diff --git a/generate/generate.py b/generate/generate.py
index 3c795baceaf701dcfd0dda1414128003a383ae8b..69f0fec35dfd92ecb06015a842af8bfb97a968c3 100644
--- a/generate/generate.py
+++ b/generate/generate.py
@@ -4,6 +4,7 @@ import os
 import sys
 from typing import Any, Dict, List, Union
 from multiprocessing import Pool
+
 if sys.version_info >= (3, 11):
     import tomllib
 else:
@@ -89,15 +90,21 @@ def main() -> None:
     os.makedirs(args.output, exist_ok=True)
 
     if not args.cmake_only:
-        with (open(args.filename, "rb") as f):
+        with open(args.filename, "rb") as f:
             toml_dict = tomllib.load(f)
             toml_dict = unfold_toml_dict(toml_dict)
 
             with Pool(args.processes) as pool:
+                async_results = []
                 for form_str, operators in toml_dict.items():
                     for spec in operators:
-                        ret = pool.apply_async(generate_operator, (args, form_str, spec))
-                        ret.get()
+                        ret = pool.apply_async(
+                            generate_operator, (args, form_str, spec)
+                        )
+                        async_results.append(ret)
+                # Getting exceptions
+                for ar in async_results:
+                    ar.get()
                 pool.close()
                 pool.join()
 
diff --git a/operators.toml b/operators.toml
index 5ca3fce1312d4ac7372a5254ce59b003d58179d1..c9e6aa466f46e17f76fba1d84276c937495b240c 100644
--- a/operators.toml
+++ b/operators.toml
@@ -22,6 +22,15 @@ quadrature    = 2
 loop-strategy = "cubes"
 optimizations = ["moveconstants", "vectorize"]
 
+[[diffusion]]
+trial-space   = "P2"
+test-space    = "P2"
+dimensions    = [2]
+quadrature    = 3
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize"]
+blending      = "AnnulusMap"
+
 [[diffusion]]
 trial-space   = "P2"
 test-space    = "P2"
@@ -57,6 +66,15 @@ quadrature    = 2
 loop-strategy = "sawtooth"
 optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"]
 
+[[divergence]]
+trial-space   = "P2"
+test-space    = "P1"
+components = [0, 1, 2]
+dimensions    = [2, 3]
+quadrature    = 2
+loop-strategy = "cubes"
+optimizations = ["moveconstants", "vectorize"]
+
 [[k_mass]]
 trial-space   = "P1"
 test-space    = "P1"
@@ -66,6 +84,16 @@ quadrature    = 3
 loop-strategy = "sawtooth"
 optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"]
 
+[[k_mass]]
+trial-space   = "P1"
+test-space    = "P1"
+form-space-args.coefficient_function_space = "P1"
+dimensions    = [2]
+quadrature    = 4
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize", "quadloops", "tabulate"]
+blending      = "AnnulusMap"
+
 [[k_mass]]
 trial-space   = "P1"
 test-space    = "P1"
@@ -79,11 +107,12 @@ blending      = "IcosahedralShellMap"
 [[divergence]]
 trial-space   = "P2"
 test-space    = "P1"
-components = [0, 1, 2]
-dimensions    = [2, 3]
+components = [0, 1]
+dimensions    = [2]
 quadrature    = 2
-loop-strategy = "cubes"
+loop-strategy = "sawtooth"
 optimizations = ["moveconstants", "vectorize"]
+blending      = "AnnulusMap"
 
 [[divergence]]
 trial-space   = "P2"
@@ -104,6 +133,16 @@ quadrature    = 2
 loop-strategy = "cubes"
 optimizations = ["moveconstants", "vectorize"]
 
+[[gradient]]
+trial-space   = "P1"
+test-space    = "P2"
+components = [0, 1]
+dimensions    = [2]
+quadrature    = 2
+loop-strategy = "sawtooth"
+optimizations = ["moveconstants", "vectorize"]
+blending      = "AnnulusMap"
+
 [[gradient]]
 trial-space   = "P1"
 test-space    = "P2"
@@ -125,6 +164,18 @@ quadrature      = 2
 loop-strategy   = "sawtooth"
 optimizations   = ["moveconstants", "vectorize", "quadloops", "tabulate"]
 
+[[epsilon]]
+trial-space               = "P2"
+test-space                = "P2"
+components-trial = [0, 1]
+components-test  = [0, 1]
+form-space-args.coefficient_function_space = "P2"
+dimensions      = [2]
+quadrature      = 3
+loop-strategy   = "sawtooth"
+optimizations   = ["moveconstants", "vectorize", "quadloops", "tabulate"]
+blending        = "AnnulusMap"
+
 [[epsilon]]
 trial-space               = "P2"
 test-space                = "P2"
@@ -148,6 +199,18 @@ quadrature      = 2
 loop-strategy   = "sawtooth"
 optimizations   = ["moveconstants", "vectorize", "quadloops", "tabulate"]
 
+[[full_stokes]]
+trial-space               = "P2"
+test-space                = "P2"
+components-trial = [0, 1]
+components-test  = [0, 1]
+form-space-args.coefficient_function_space = "P2"
+dimensions      = [2]
+quadrature      = 3
+loop-strategy   = "sawtooth"
+optimizations   = ["moveconstants", "vectorize", "quadloops", "tabulate"]
+blending        = "AnnulusMap"
+
 [[full_stokes]]
 trial-space               = "P2"
 test-space                = "P2"
@@ -158,4 +221,4 @@ dimensions      = [3]
 quadrature      = 3
 loop-strategy   = "sawtooth"
 optimizations   = ["moveconstants", "vectorize", "quadloops", "tabulate"]
-blending        = "IcosahedralShellMap"
+blending        = "IcosahedralShellMap"
\ No newline at end of file
diff --git a/operators/diffusion/CMakeLists.txt b/operators/diffusion/CMakeLists.txt
index 41dc248b32d05491c8c4a6e5e330e1146a5baf70..89d33aa9a3b8ac707365596968b9e8c635358134 100644
--- a/operators/diffusion/CMakeLists.txt
+++ b/operators/diffusion/CMakeLists.txt
@@ -4,6 +4,8 @@ add_library( opgen-diffusion
    P1ElementwiseDiffusion.hpp
    P2ElementwiseDiffusion.cpp
    P2ElementwiseDiffusion.hpp
+   P2ElementwiseDiffusionAnnulusMap.cpp
+   P2ElementwiseDiffusionAnnulusMap.hpp
    P2ElementwiseDiffusionIcosahedralShellMap.cpp
    P2ElementwiseDiffusionIcosahedralShellMap.hpp
 )
@@ -15,6 +17,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P1ElementwiseDiffusion_apply_macro_3D.cpp
       avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
       avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P2ElementwiseDiffusion_apply_macro_2D.cpp
@@ -23,6 +27,7 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
       noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp
       noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp
       noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp
@@ -34,6 +39,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P1ElementwiseDiffusion_apply_macro_3D.cpp
       avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
+      avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
       avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P2ElementwiseDiffusion_apply_macro_2D.cpp
@@ -56,6 +63,9 @@ else()
       noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp
       noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp
       noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp
       noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
       noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp
diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..567b03cee2c932fa00c6571bde0c0a2ee7e7f017
--- /dev/null
+++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp
@@ -0,0 +1,380 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseDiffusionAnnulusMap::P2ElementwiseDiffusionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                    size_t                                     minLevel,
+                                                                    size_t                                     maxLevel )
+: Operator( storage, minLevel, maxLevel )
+{}
+
+void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src,
+                                              const P2Function< real_t >& dst,
+                                              uint_t                      level,
+                                              DoFType                     flag,
+                                              UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                 const P2Function< idx_t >&                  src,
+                                                 const P2Function< idx_t >&                  dst,
+                                                 uint_t                                      level,
+                                                 DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_macro_2D(
+
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+      }
+
+      ( *invDiag_ ).invertElementwise( level );
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseDiffusionAnnulusMap::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ddba212d6f94836ee76ad2d632781b4fbc37490d
--- /dev/null
+++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp
@@ -0,0 +1,165 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Diffusion operator without coefficients.
+///
+/// Geometry map: AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (space: Lagrange, degree: 2)
+///     v: test function  (space: Lagrange, degree: 2)
+///
+///     ∫ ∇u · ∇v
+
+class P2ElementwiseDiffusionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                         public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseDiffusionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, size_t minLevel, size_t maxLevel );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    446     738      18      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    410     702      18      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+   /// Kernel type: computeInverseDiagonalOperatorValues
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    311     522      18      16      4              0                 0              1
+   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                       real_t* RESTRICT _data_invDiag_Vertex,
+                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                       int64_t          micro_edges_per_macro_edge,
+                                                       real_t           micro_edges_per_macro_edge_float,
+                                                       real_t           radRayVertex,
+                                                       real_t           radRefVertex,
+                                                       real_t           rayVertex_0,
+                                                       real_t           rayVertex_1,
+                                                       real_t           refVertex_0,
+                                                       real_t           refVertex_1,
+                                                       real_t           thrVertex_0,
+                                                       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5d1b71591a4db99091930b0685c3c8c65a65c86f
--- /dev/null
+++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
@@ -0,0 +1,1357 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_1 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_15 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_16 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_17 = -tmp_kernel_op_16;
+       const real_t tmp_kernel_op_18 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_19 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19*1.0 / (-tmp_kernel_op_1*tmp_kernel_op_15 + tmp_kernel_op_17*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_21 = tmp_kernel_op_20*1.0;
+       const real_t tmp_kernel_op_25 = -rayVertex_1;
+       const real_t tmp_kernel_op_26 = -rayVertex_0;
+       const real_t tmp_kernel_op_35 = 1.3333333333333333;
+       const real_t tmp_kernel_op_36 = 1.3333333333333333;
+       const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36 - 3.0;
+       const real_t tmp_kernel_op_50 = tmp_kernel_op_19*1.0 / (tmp_kernel_op_0*tmp_kernel_op_15 - tmp_kernel_op_16*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_51 = tmp_kernel_op_50*1.0;
+       const real_t tmp_kernel_op_77 = 0.80000000000000004;
+       const real_t tmp_kernel_op_78 = 2.3999999999999999;
+       const real_t tmp_kernel_op_79 = tmp_kernel_op_77 + tmp_kernel_op_78 - 3.0;
+       const real_t tmp_kernel_op_115 = 2.3999999999999999;
+       const real_t tmp_kernel_op_116 = 0.80000000000000004;
+       const real_t tmp_kernel_op_117 = tmp_kernel_op_115 + tmp_kernel_op_116 - 3.0;
+       const real_t tmp_kernel_op_153 = 0.80000000000000004;
+       const real_t tmp_kernel_op_154 = 0.80000000000000004;
+       const real_t tmp_kernel_op_155 = tmp_kernel_op_153 + tmp_kernel_op_154 - 3.0;
+       const real_t tmp_kernel_op_217 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_218 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_220 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_221 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_225 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_78;
+       const real_t tmp_kernel_op_226 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_77;
+       const real_t tmp_kernel_op_228 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_78;
+       const real_t tmp_kernel_op_229 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_77;
+       const real_t tmp_kernel_op_233 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_116;
+       const real_t tmp_kernel_op_234 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_115;
+       const real_t tmp_kernel_op_236 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_116;
+       const real_t tmp_kernel_op_237 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_115;
+       const real_t tmp_kernel_op_241 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_154;
+       const real_t tmp_kernel_op_242 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_153;
+       const real_t tmp_kernel_op_244 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_154;
+       const real_t tmp_kernel_op_245 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_153;
+       const real_t tmp_kernel_op_250 = -tmp_kernel_op_35 + 1.3333333333333335;
+       const real_t tmp_kernel_op_255 = -tmp_kernel_op_77 - 0.79999999999999982;
+       const real_t tmp_kernel_op_260 = -tmp_kernel_op_115 + 2.3999999999999999;
+       const real_t tmp_kernel_op_265 = -tmp_kernel_op_153 + 2.3999999999999999;
+       const real_t tmp_kernel_op_271 = -tmp_kernel_op_36 + 1.3333333333333335;
+       const real_t tmp_kernel_op_276 = -tmp_kernel_op_78 + 2.3999999999999999;
+       const real_t tmp_kernel_op_281 = -tmp_kernel_op_116 - 0.79999999999999982;
+       const real_t tmp_kernel_op_286 = -tmp_kernel_op_154 + 2.3999999999999999;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_6 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_0);
+                const __m256d tmp_kernel_op_7 = _mm256_mul_pd(tmp_kernel_op_6,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_10 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_11 = _mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_1);
+                const __m256d tmp_kernel_op_13 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_12);
+                const __m256d tmp_kernel_op_14 = _mm256_add_pd(tmp_kernel_op_13,tmp_kernel_op_7);
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_14)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_24 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_14),_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_14));
+                const __m256d tmp_kernel_op_27 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_6),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_12),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_29 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_13,tmp_kernel_op_28));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_22);
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),tmp_kernel_op_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_32 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_24),tmp_kernel_op_27),tmp_kernel_op_6),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_33 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_28),tmp_kernel_op_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_34 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_31),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_33)));
+                const __m256d tmp_kernel_op_38 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_39 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_40 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_38),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_39));
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_42 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_39),_mm256_mul_pd(tmp_kernel_op_38,tmp_kernel_op_41));
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_45 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_47 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_48 = _mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_47);
+                const __m256d tmp_kernel_op_49 = _mm256_add_pd(tmp_kernel_op_45,tmp_kernel_op_48);
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_49)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_52);
+                const __m256d tmp_kernel_op_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_47),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_49),_mm256_mul_pd(tmp_kernel_op_49,tmp_kernel_op_49)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_55,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_52);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_55,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_59 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_47);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(-0.28125,-0.28125,-0.28125,-0.28125),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_58)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_48,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_58,tmp_kernel_op_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_56,tmp_kernel_op_59))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61);
+                const __m256d tmp_kernel_op_63 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_64 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_63);
+                const __m256d tmp_kernel_op_65 = _mm256_add_pd(tmp_kernel_op_62,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_65)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_66);
+                const __m256d tmp_kernel_op_68 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_65),_mm256_mul_pd(tmp_kernel_op_65,tmp_kernel_op_65));
+                const __m256d tmp_kernel_op_69 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_61),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,tmp_kernel_op_69),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_71 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_70));
+                const __m256d tmp_kernel_op_72 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_66);
+                const __m256d tmp_kernel_op_73 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_68),tmp_kernel_op_69),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_63),tmp_kernel_op_68),tmp_kernel_op_69),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_75 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_63),tmp_kernel_op_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_76 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_73),_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_75)));
+                const __m256d tmp_kernel_op_80 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79))));
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79))));
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_81));
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_75,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_81),_mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_83));
+                const __m256d tmp_kernel_op_85 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_88 = _mm256_mul_pd(tmp_kernel_op_87,tmp_kernel_op_87);
+                const __m256d tmp_kernel_op_89 = _mm256_add_pd(tmp_kernel_op_86,tmp_kernel_op_88);
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_89)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_90);
+                const __m256d tmp_kernel_op_92 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_87),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_89),_mm256_mul_pd(tmp_kernel_op_89,tmp_kernel_op_89)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_92,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_95 = _mm256_mul_pd(tmp_kernel_op_87,tmp_kernel_op_90);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_92,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_97 = _mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_87);
+                const __m256d tmp_kernel_op_98 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_95,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_86,tmp_kernel_op_96)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,tmp_kernel_op_94),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_96,tmp_kernel_op_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_95,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_94,tmp_kernel_op_97))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_102 = _mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_101);
+                const __m256d tmp_kernel_op_103 = _mm256_add_pd(tmp_kernel_op_100,tmp_kernel_op_102);
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_103)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_105 = _mm256_mul_pd(tmp_kernel_op_104,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_106 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_103),_mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_103));
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_99),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_101),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_108 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_107),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_109 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_108));
+                const __m256d tmp_kernel_op_110 = _mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_104);
+                const __m256d tmp_kernel_op_111 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_106),tmp_kernel_op_107),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_112 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_106),tmp_kernel_op_107),tmp_kernel_op_99),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_113 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_108),tmp_kernel_op_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_114 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_111),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_113)));
+                const __m256d tmp_kernel_op_118 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117))));
+                const __m256d tmp_kernel_op_119 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117))));
+                const __m256d tmp_kernel_op_120 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_118),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_119));
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_122 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_119),_mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_121));
+                const __m256d tmp_kernel_op_123 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(tmp_kernel_op_123,tmp_kernel_op_123);
+                const __m256d tmp_kernel_op_125 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_125);
+                const __m256d tmp_kernel_op_127 = _mm256_add_pd(tmp_kernel_op_124,tmp_kernel_op_126);
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_127)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(tmp_kernel_op_123,tmp_kernel_op_128);
+                const __m256d tmp_kernel_op_130 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_125),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_123),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_131 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_127),_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_127)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_132 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_133 = _mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_128);
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_123,tmp_kernel_op_125);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_133,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_134)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_126,tmp_kernel_op_132),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_135),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_133,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_132,tmp_kernel_op_135))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_137 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_0);
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_137);
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_1);
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_139);
+                const __m256d tmp_kernel_op_141 = _mm256_add_pd(tmp_kernel_op_138,tmp_kernel_op_140);
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_141)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_143 = _mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_142);
+                const __m256d tmp_kernel_op_144 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_141),_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_141));
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_137),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_139),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_146 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_145),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_147 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_140,tmp_kernel_op_146));
+                const __m256d tmp_kernel_op_148 = _mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_142);
+                const __m256d tmp_kernel_op_149 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_144),tmp_kernel_op_145),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_150 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_139),tmp_kernel_op_144),tmp_kernel_op_145),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_151 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_139),tmp_kernel_op_146),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_152 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_149),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_151)));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155))));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155))));
+                const __m256d tmp_kernel_op_158 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_156),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_157));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_160 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_157),_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_159));
+                const __m256d tmp_kernel_op_161 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_161,tmp_kernel_op_161);
+                const __m256d tmp_kernel_op_163 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_164 = _mm256_mul_pd(tmp_kernel_op_163,tmp_kernel_op_163);
+                const __m256d tmp_kernel_op_165 = _mm256_add_pd(tmp_kernel_op_162,tmp_kernel_op_164);
+                const __m256d tmp_kernel_op_166 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_165)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_167 = _mm256_mul_pd(tmp_kernel_op_161,tmp_kernel_op_166);
+                const __m256d tmp_kernel_op_168 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_163),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_161),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_169 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_165),_mm256_mul_pd(tmp_kernel_op_165,tmp_kernel_op_165)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_170 = _mm256_mul_pd(tmp_kernel_op_169,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_171 = _mm256_mul_pd(tmp_kernel_op_163,tmp_kernel_op_166);
+                const __m256d tmp_kernel_op_172 = _mm256_mul_pd(tmp_kernel_op_169,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_173 = _mm256_mul_pd(tmp_kernel_op_161,tmp_kernel_op_163);
+                const __m256d tmp_kernel_op_174 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_162,tmp_kernel_op_172)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_164,tmp_kernel_op_170),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_173),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_170,tmp_kernel_op_173))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_175 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35)));
+                const __m256d tmp_kernel_op_176 = _mm256_mul_pd(tmp_kernel_op_175,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_177 = _mm256_mul_pd(tmp_kernel_op_175,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_178 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_176,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_177,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_179 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_176,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_177,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_180 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77)));
+                const __m256d tmp_kernel_op_181 = _mm256_mul_pd(tmp_kernel_op_180,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_182 = _mm256_mul_pd(tmp_kernel_op_180,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_183 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_182,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_184 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_182,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_185 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_115,tmp_kernel_op_115,tmp_kernel_op_115,tmp_kernel_op_115)));
+                const __m256d tmp_kernel_op_186 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_187 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_188 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_186),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_187));
+                const __m256d tmp_kernel_op_189 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_187),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_186));
+                const __m256d tmp_kernel_op_190 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)));
+                const __m256d tmp_kernel_op_191 = _mm256_mul_pd(tmp_kernel_op_190,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_192 = _mm256_mul_pd(tmp_kernel_op_190,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_193 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_191),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_192));
+                const __m256d tmp_kernel_op_194 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_192),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_191));
+                const __m256d tmp_kernel_op_195 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_188),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_189))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_193),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_194)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_196 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36)));
+                const __m256d tmp_kernel_op_197 = _mm256_mul_pd(tmp_kernel_op_196,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_198 = _mm256_mul_pd(tmp_kernel_op_196,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_199 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_197,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_198,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_200 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_197,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_198,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_201 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_78,tmp_kernel_op_78,tmp_kernel_op_78,tmp_kernel_op_78)));
+                const __m256d tmp_kernel_op_202 = _mm256_mul_pd(tmp_kernel_op_201,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_203 = _mm256_mul_pd(tmp_kernel_op_201,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_204 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_203,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_205 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_203,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_206 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_116,tmp_kernel_op_116,tmp_kernel_op_116,tmp_kernel_op_116)));
+                const __m256d tmp_kernel_op_207 = _mm256_mul_pd(tmp_kernel_op_206,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_208 = _mm256_mul_pd(tmp_kernel_op_206,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_209 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_207),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_208));
+                const __m256d tmp_kernel_op_210 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_208),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_207));
+                const __m256d tmp_kernel_op_211 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_154,tmp_kernel_op_154,tmp_kernel_op_154,tmp_kernel_op_154)));
+                const __m256d tmp_kernel_op_212 = _mm256_mul_pd(tmp_kernel_op_211,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_213 = _mm256_mul_pd(tmp_kernel_op_211,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_214 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_212),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_213));
+                const __m256d tmp_kernel_op_215 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_213),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_212));
+                const __m256d tmp_kernel_op_216 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_209),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_210))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_215)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_219 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_217,tmp_kernel_op_217,tmp_kernel_op_217,tmp_kernel_op_217),_mm256_set_pd(tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218)));
+                const __m256d tmp_kernel_op_222 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_220,tmp_kernel_op_220,tmp_kernel_op_220,tmp_kernel_op_220),_mm256_set_pd(tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221)));
+                const __m256d tmp_kernel_op_223 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_222,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_224 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_222,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_227 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_225,tmp_kernel_op_225,tmp_kernel_op_225,tmp_kernel_op_225),_mm256_set_pd(tmp_kernel_op_226,tmp_kernel_op_226,tmp_kernel_op_226,tmp_kernel_op_226)));
+                const __m256d tmp_kernel_op_230 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_228,tmp_kernel_op_228,tmp_kernel_op_228,tmp_kernel_op_228),_mm256_set_pd(tmp_kernel_op_229,tmp_kernel_op_229,tmp_kernel_op_229,tmp_kernel_op_229)));
+                const __m256d tmp_kernel_op_231 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_227,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_230,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_232 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_227,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_230,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_235 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233),_mm256_set_pd(tmp_kernel_op_234,tmp_kernel_op_234,tmp_kernel_op_234,tmp_kernel_op_234)));
+                const __m256d tmp_kernel_op_238 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236),_mm256_set_pd(tmp_kernel_op_237,tmp_kernel_op_237,tmp_kernel_op_237,tmp_kernel_op_237)));
+                const __m256d tmp_kernel_op_239 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_235),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_238));
+                const __m256d tmp_kernel_op_240 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_238),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_235));
+                const __m256d tmp_kernel_op_243 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_241,tmp_kernel_op_241,tmp_kernel_op_241,tmp_kernel_op_241),_mm256_set_pd(tmp_kernel_op_242,tmp_kernel_op_242,tmp_kernel_op_242,tmp_kernel_op_242)));
+                const __m256d tmp_kernel_op_246 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_244,tmp_kernel_op_244,tmp_kernel_op_244,tmp_kernel_op_244),_mm256_set_pd(tmp_kernel_op_245,tmp_kernel_op_245,tmp_kernel_op_245,tmp_kernel_op_245)));
+                const __m256d tmp_kernel_op_247 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_243),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_246));
+                const __m256d tmp_kernel_op_248 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_246),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_243));
+                const __m256d tmp_kernel_op_249 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_240))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_248)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_251 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_217,tmp_kernel_op_217,tmp_kernel_op_217,tmp_kernel_op_217)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250))));
+                const __m256d tmp_kernel_op_252 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_220,tmp_kernel_op_220,tmp_kernel_op_220,tmp_kernel_op_220)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250))));
+                const __m256d tmp_kernel_op_253 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_251,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_252,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_254 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_251,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_252,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_256 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_225,tmp_kernel_op_225,tmp_kernel_op_225,tmp_kernel_op_225)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255))));
+                const __m256d tmp_kernel_op_257 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_228,tmp_kernel_op_228,tmp_kernel_op_228,tmp_kernel_op_228)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255))));
+                const __m256d tmp_kernel_op_258 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_256,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_257,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_259 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_256,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_257,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_261 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260))));
+                const __m256d tmp_kernel_op_262 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260))));
+                const __m256d tmp_kernel_op_263 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_261),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_262));
+                const __m256d tmp_kernel_op_264 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_262),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_261));
+                const __m256d tmp_kernel_op_266 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_241,tmp_kernel_op_241,tmp_kernel_op_241,tmp_kernel_op_241)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265))));
+                const __m256d tmp_kernel_op_267 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_244,tmp_kernel_op_244,tmp_kernel_op_244,tmp_kernel_op_244)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265))));
+                const __m256d tmp_kernel_op_268 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_266),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_267));
+                const __m256d tmp_kernel_op_269 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_267),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_266));
+                const __m256d tmp_kernel_op_270 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_264))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_269)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_253,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_254,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_258,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_259,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_272 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271))));
+                const __m256d tmp_kernel_op_273 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271))));
+                const __m256d tmp_kernel_op_274 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_272,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_273,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_275 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_272,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_273,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_277 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_226,tmp_kernel_op_226,tmp_kernel_op_226,tmp_kernel_op_226)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276))));
+                const __m256d tmp_kernel_op_278 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_229,tmp_kernel_op_229,tmp_kernel_op_229,tmp_kernel_op_229)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276))));
+                const __m256d tmp_kernel_op_279 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_277,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_278,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_280 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_277,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_278,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_282 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_234,tmp_kernel_op_234,tmp_kernel_op_234,tmp_kernel_op_234)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281))));
+                const __m256d tmp_kernel_op_283 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_237,tmp_kernel_op_237,tmp_kernel_op_237,tmp_kernel_op_237)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281))));
+                const __m256d tmp_kernel_op_284 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_282),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_283));
+                const __m256d tmp_kernel_op_285 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_283),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_282));
+                const __m256d tmp_kernel_op_287 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_242,tmp_kernel_op_242,tmp_kernel_op_242,tmp_kernel_op_242)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286))));
+                const __m256d tmp_kernel_op_288 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_245,tmp_kernel_op_245,tmp_kernel_op_245,tmp_kernel_op_245)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286))));
+                const __m256d tmp_kernel_op_289 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_287),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_288));
+                const __m256d tmp_kernel_op_290 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_288),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_287));
+                const __m256d tmp_kernel_op_291 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_285))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_290)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_274,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_275,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_279,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_280,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_292 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_199),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_200))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_204),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_205)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_209),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_210)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_215))));
+                const __m256d tmp_kernel_op_293 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_224))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_231),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_232)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_240)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_248))));
+                const __m256d tmp_kernel_op_294 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_269))));
+                const __m256d tmp_kernel_op_295 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_290))));
+                const __m256d tmp_kernel_op_296 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_224))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_231),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_232)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_240)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_248))));
+                const __m256d tmp_kernel_op_297 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_269))));
+                const __m256d tmp_kernel_op_298 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_290))));
+                const __m256d tmp_kernel_op_299 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_239,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_240,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_247,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_248,tmp_kernel_op_269))));
+                const __m256d tmp_kernel_op_300 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_239,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_240,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_247,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_248,tmp_kernel_op_290))));
+                const __m256d tmp_kernel_op_301 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_253,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_254,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_258,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_259,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_263,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_264,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_268,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_269,tmp_kernel_op_290))));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_120),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_122))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_160)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_40,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_84))))),_mm256_mul_pd(src_dof_1,tmp_kernel_op_195)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_216)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_249)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_270)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_291));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_178),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_179))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_183),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_184)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_188),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_189)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_193),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_194))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_195)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_292)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_293)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_294)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_295));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_199),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_200))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_204),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_205)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_209),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_210)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_215))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_216)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_292)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_296)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_297)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_298));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_224))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_231),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_232)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_239,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_240,tmp_kernel_op_240)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_247,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_248,tmp_kernel_op_248))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_249)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_293)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_296)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_299)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_300));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_253,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_254,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_258,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_259,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_263,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_264,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_268,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_269,tmp_kernel_op_269))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_270)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_294)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_297)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_299)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_301));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_274,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_275,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_279,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_280,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_284,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_285,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_289,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_290,tmp_kernel_op_290))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_291)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_295)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_298)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_300)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_301));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+                const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+                const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+                const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+                const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+                const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+                const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+                const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+                const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+                const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+                const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_40 = tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39;
+                const real_t tmp_kernel_op_41 = -tmp_kernel_op_33;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = -p_affine_0_0;
+                const real_t tmp_kernel_op_44 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+                const real_t tmp_kernel_op_46 = -p_affine_0_1;
+                const real_t tmp_kernel_op_47 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_46 + tmp_kernel_op_8*0.33333333333333331;
+                const real_t tmp_kernel_op_48 = (tmp_kernel_op_47*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_49 = tmp_kernel_op_45 + tmp_kernel_op_48;
+                const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_49, -0.50000000000000000)*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_44*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_54 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_44) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_47);
+                const real_t tmp_kernel_op_55 = pow(tmp_kernel_op_49, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_47*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_58 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+                const real_t tmp_kernel_op_59 = tmp_kernel_op_44*tmp_kernel_op_47;
+                const real_t tmp_kernel_op_60 = abs_det_jac_affine_GRAY*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_53 - tmp_kernel_op_48*tmp_kernel_op_56)*(tmp_kernel_op_16*tmp_kernel_op_57 + tmp_kernel_op_45*tmp_kernel_op_58) - (tmp_kernel_op_0*tmp_kernel_op_57 + tmp_kernel_op_56*tmp_kernel_op_59)*(tmp_kernel_op_16*tmp_kernel_op_53 - tmp_kernel_op_58*tmp_kernel_op_59));
+                const real_t tmp_kernel_op_61 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+                const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+                const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+                const real_t tmp_kernel_op_65 = tmp_kernel_op_62 + tmp_kernel_op_64;
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_21*pow(tmp_kernel_op_65, -0.50000000000000000);
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_61*tmp_kernel_op_66;
+                const real_t tmp_kernel_op_68 = pow(tmp_kernel_op_65, -1.5000000000000000);
+                const real_t tmp_kernel_op_69 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_61) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_63));
+                const real_t tmp_kernel_op_70 = tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_1*tmp_kernel_op_67 + tmp_kernel_op_64*tmp_kernel_op_70;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_66;
+                const real_t tmp_kernel_op_73 = -tmp_kernel_op_17*tmp_kernel_op_72 + tmp_kernel_op_62*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+                const real_t tmp_kernel_op_74 = tmp_kernel_op_17*tmp_kernel_op_67 + tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+                const real_t tmp_kernel_op_75 = tmp_kernel_op_1*tmp_kernel_op_72 - tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_70;
+                const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_71*tmp_kernel_op_73 + tmp_kernel_op_74*tmp_kernel_op_75);
+                const real_t tmp_kernel_op_80 = tmp_kernel_op_76*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_81 = tmp_kernel_op_76*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_71*tmp_kernel_op_80 + tmp_kernel_op_74*tmp_kernel_op_81;
+                const real_t tmp_kernel_op_83 = -tmp_kernel_op_75;
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_73*tmp_kernel_op_81 + tmp_kernel_op_80*tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_86 = (tmp_kernel_op_85*tmp_kernel_op_85);
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_88 = (tmp_kernel_op_87*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_89 = tmp_kernel_op_86 + tmp_kernel_op_88;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_51*pow(tmp_kernel_op_89, -0.50000000000000000);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_85*tmp_kernel_op_90;
+                const real_t tmp_kernel_op_92 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_85) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_87);
+                const real_t tmp_kernel_op_93 = pow(tmp_kernel_op_89, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+                const real_t tmp_kernel_op_95 = tmp_kernel_op_87*tmp_kernel_op_90;
+                const real_t tmp_kernel_op_96 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+                const real_t tmp_kernel_op_97 = tmp_kernel_op_85*tmp_kernel_op_87;
+                const real_t tmp_kernel_op_98 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_91 - tmp_kernel_op_88*tmp_kernel_op_94)*(tmp_kernel_op_16*tmp_kernel_op_95 + tmp_kernel_op_86*tmp_kernel_op_96) - (tmp_kernel_op_0*tmp_kernel_op_95 + tmp_kernel_op_94*tmp_kernel_op_97)*(tmp_kernel_op_16*tmp_kernel_op_91 - tmp_kernel_op_96*tmp_kernel_op_97));
+                const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+                const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+                const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+                const real_t tmp_kernel_op_103 = tmp_kernel_op_100 + tmp_kernel_op_102;
+                const real_t tmp_kernel_op_104 = pow(tmp_kernel_op_103, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_104*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_103, -1.5000000000000000);
+                const real_t tmp_kernel_op_107 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_99) - tmp_kernel_op_17*(tmp_kernel_op_101 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_106*tmp_kernel_op_107*1.0;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_105 + tmp_kernel_op_102*tmp_kernel_op_108;
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_104;
+                const real_t tmp_kernel_op_111 = tmp_kernel_op_100*tmp_kernel_op_106*tmp_kernel_op_107*1.0 - tmp_kernel_op_110*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_101*tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_99*1.0 + tmp_kernel_op_105*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_113 = tmp_kernel_op_1*tmp_kernel_op_110 - tmp_kernel_op_101*tmp_kernel_op_108*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_109*tmp_kernel_op_111 + tmp_kernel_op_112*tmp_kernel_op_113);
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_114*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_117 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_117);
+                const real_t tmp_kernel_op_119 = tmp_kernel_op_114*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_117 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_117);
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_109*tmp_kernel_op_118 + tmp_kernel_op_112*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = -tmp_kernel_op_113;
+                const real_t tmp_kernel_op_122 = tmp_kernel_op_111*tmp_kernel_op_119 + tmp_kernel_op_118*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_123 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_124 = (tmp_kernel_op_123*tmp_kernel_op_123);
+                const real_t tmp_kernel_op_125 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.59999999999999998;
+                const real_t tmp_kernel_op_126 = (tmp_kernel_op_125*tmp_kernel_op_125);
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_124 + tmp_kernel_op_126;
+                const real_t tmp_kernel_op_128 = pow(tmp_kernel_op_127, -0.50000000000000000)*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_129 = tmp_kernel_op_123*tmp_kernel_op_128;
+                const real_t tmp_kernel_op_130 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_123) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_125);
+                const real_t tmp_kernel_op_131 = pow(tmp_kernel_op_127, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_132 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_133 = tmp_kernel_op_125*tmp_kernel_op_128;
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_123*tmp_kernel_op_125;
+                const real_t tmp_kernel_op_136 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_129 - tmp_kernel_op_126*tmp_kernel_op_132)*(tmp_kernel_op_124*tmp_kernel_op_134 + tmp_kernel_op_133*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_133 + tmp_kernel_op_132*tmp_kernel_op_135)*(tmp_kernel_op_129*tmp_kernel_op_16 - tmp_kernel_op_134*tmp_kernel_op_135));
+                const real_t tmp_kernel_op_137 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_138 = (tmp_kernel_op_137*tmp_kernel_op_137);
+                const real_t tmp_kernel_op_139 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_140 = (tmp_kernel_op_139*tmp_kernel_op_139);
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_138 + tmp_kernel_op_140;
+                const real_t tmp_kernel_op_142 = pow(tmp_kernel_op_141, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_137*tmp_kernel_op_142;
+                const real_t tmp_kernel_op_144 = pow(tmp_kernel_op_141, -1.5000000000000000);
+                const real_t tmp_kernel_op_145 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_137 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_139 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_144*tmp_kernel_op_145*1.0;
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_1*tmp_kernel_op_143 + tmp_kernel_op_140*tmp_kernel_op_146;
+                const real_t tmp_kernel_op_148 = tmp_kernel_op_139*tmp_kernel_op_142;
+                const real_t tmp_kernel_op_149 = tmp_kernel_op_138*tmp_kernel_op_144*tmp_kernel_op_145*1.0 - tmp_kernel_op_148*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_144*tmp_kernel_op_145*1.0 + tmp_kernel_op_143*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_1*tmp_kernel_op_148 - tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_146;
+                const real_t tmp_kernel_op_152 = 1.0 / (tmp_kernel_op_147*tmp_kernel_op_149 + tmp_kernel_op_150*tmp_kernel_op_151);
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_152*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_155 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_155);
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_152*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_155 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_155);
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_150*tmp_kernel_op_157;
+                const real_t tmp_kernel_op_159 = -tmp_kernel_op_151;
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_149*tmp_kernel_op_157 + tmp_kernel_op_156*tmp_kernel_op_159;
+                const real_t tmp_kernel_op_161 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_162 = (tmp_kernel_op_161*tmp_kernel_op_161);
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_164 = (tmp_kernel_op_163*tmp_kernel_op_163);
+                const real_t tmp_kernel_op_165 = tmp_kernel_op_162 + tmp_kernel_op_164;
+                const real_t tmp_kernel_op_166 = pow(tmp_kernel_op_165, -0.50000000000000000)*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_167 = tmp_kernel_op_161*tmp_kernel_op_166;
+                const real_t tmp_kernel_op_168 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_161) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_163);
+                const real_t tmp_kernel_op_169 = pow(tmp_kernel_op_165, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_170 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_171 = tmp_kernel_op_163*tmp_kernel_op_166;
+                const real_t tmp_kernel_op_172 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_173 = tmp_kernel_op_161*tmp_kernel_op_163;
+                const real_t tmp_kernel_op_174 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_167 - tmp_kernel_op_164*tmp_kernel_op_170)*(tmp_kernel_op_16*tmp_kernel_op_171 + tmp_kernel_op_162*tmp_kernel_op_172) - (tmp_kernel_op_0*tmp_kernel_op_171 + tmp_kernel_op_170*tmp_kernel_op_173)*(tmp_kernel_op_16*tmp_kernel_op_167 - tmp_kernel_op_172*tmp_kernel_op_173));
+                const real_t tmp_kernel_op_175 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+                const real_t tmp_kernel_op_176 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_175;
+                const real_t tmp_kernel_op_177 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_175;
+                const real_t tmp_kernel_op_178 = tmp_kernel_op_176*tmp_kernel_op_29 + tmp_kernel_op_177*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_179 = tmp_kernel_op_176*tmp_kernel_op_41 + tmp_kernel_op_177*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_180 = tmp_kernel_op_76*(tmp_kernel_op_77 - 1.0);
+                const real_t tmp_kernel_op_181 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_180;
+                const real_t tmp_kernel_op_182 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_180;
+                const real_t tmp_kernel_op_183 = tmp_kernel_op_181*tmp_kernel_op_71 + tmp_kernel_op_182*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_184 = tmp_kernel_op_181*tmp_kernel_op_83 + tmp_kernel_op_182*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_185 = tmp_kernel_op_114*(tmp_kernel_op_115 - 1.0);
+                const real_t tmp_kernel_op_186 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_187 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_188 = tmp_kernel_op_109*tmp_kernel_op_186 + tmp_kernel_op_112*tmp_kernel_op_187;
+                const real_t tmp_kernel_op_189 = tmp_kernel_op_111*tmp_kernel_op_187 + tmp_kernel_op_121*tmp_kernel_op_186;
+                const real_t tmp_kernel_op_190 = tmp_kernel_op_152*(tmp_kernel_op_153 - 1.0);
+                const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_190;
+                const real_t tmp_kernel_op_192 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_190;
+                const real_t tmp_kernel_op_193 = tmp_kernel_op_147*tmp_kernel_op_191 + tmp_kernel_op_150*tmp_kernel_op_192;
+                const real_t tmp_kernel_op_194 = tmp_kernel_op_149*tmp_kernel_op_192 + tmp_kernel_op_159*tmp_kernel_op_191;
+                const real_t tmp_kernel_op_195 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_188 + tmp_kernel_op_122*tmp_kernel_op_189) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_193 + tmp_kernel_op_160*tmp_kernel_op_194) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_40 + tmp_kernel_op_179*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_82 + tmp_kernel_op_184*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+                const real_t tmp_kernel_op_197 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_196;
+                const real_t tmp_kernel_op_198 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_196;
+                const real_t tmp_kernel_op_199 = tmp_kernel_op_197*tmp_kernel_op_29 + tmp_kernel_op_198*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_200 = tmp_kernel_op_197*tmp_kernel_op_41 + tmp_kernel_op_198*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_201 = tmp_kernel_op_76*(tmp_kernel_op_78 - 1.0);
+                const real_t tmp_kernel_op_202 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_201;
+                const real_t tmp_kernel_op_203 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_201;
+                const real_t tmp_kernel_op_204 = tmp_kernel_op_202*tmp_kernel_op_71 + tmp_kernel_op_203*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_205 = tmp_kernel_op_202*tmp_kernel_op_83 + tmp_kernel_op_203*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_206 = tmp_kernel_op_114*(tmp_kernel_op_116 - 1.0);
+                const real_t tmp_kernel_op_207 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_206;
+                const real_t tmp_kernel_op_208 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_206;
+                const real_t tmp_kernel_op_209 = tmp_kernel_op_109*tmp_kernel_op_207 + tmp_kernel_op_112*tmp_kernel_op_208;
+                const real_t tmp_kernel_op_210 = tmp_kernel_op_111*tmp_kernel_op_208 + tmp_kernel_op_121*tmp_kernel_op_207;
+                const real_t tmp_kernel_op_211 = tmp_kernel_op_152*(tmp_kernel_op_154 - 1.0);
+                const real_t tmp_kernel_op_212 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_211;
+                const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_211;
+                const real_t tmp_kernel_op_214 = tmp_kernel_op_147*tmp_kernel_op_212 + tmp_kernel_op_150*tmp_kernel_op_213;
+                const real_t tmp_kernel_op_215 = tmp_kernel_op_149*tmp_kernel_op_213 + tmp_kernel_op_159*tmp_kernel_op_212;
+                const real_t tmp_kernel_op_216 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_209 + tmp_kernel_op_122*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_214 + tmp_kernel_op_160*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_40 + tmp_kernel_op_200*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_82 + tmp_kernel_op_205*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_219 = tmp_kernel_op_34*(tmp_kernel_op_217 + tmp_kernel_op_218);
+                const real_t tmp_kernel_op_222 = tmp_kernel_op_34*(tmp_kernel_op_220 + tmp_kernel_op_221);
+                const real_t tmp_kernel_op_223 = tmp_kernel_op_219*tmp_kernel_op_29 + tmp_kernel_op_222*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_224 = tmp_kernel_op_219*tmp_kernel_op_41 + tmp_kernel_op_222*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_227 = tmp_kernel_op_76*(tmp_kernel_op_225 + tmp_kernel_op_226);
+                const real_t tmp_kernel_op_230 = tmp_kernel_op_76*(tmp_kernel_op_228 + tmp_kernel_op_229);
+                const real_t tmp_kernel_op_231 = tmp_kernel_op_227*tmp_kernel_op_71 + tmp_kernel_op_230*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_232 = tmp_kernel_op_227*tmp_kernel_op_83 + tmp_kernel_op_230*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_235 = tmp_kernel_op_114*(tmp_kernel_op_233 + tmp_kernel_op_234);
+                const real_t tmp_kernel_op_238 = tmp_kernel_op_114*(tmp_kernel_op_236 + tmp_kernel_op_237);
+                const real_t tmp_kernel_op_239 = tmp_kernel_op_109*tmp_kernel_op_235 + tmp_kernel_op_112*tmp_kernel_op_238;
+                const real_t tmp_kernel_op_240 = tmp_kernel_op_111*tmp_kernel_op_238 + tmp_kernel_op_121*tmp_kernel_op_235;
+                const real_t tmp_kernel_op_243 = tmp_kernel_op_152*(tmp_kernel_op_241 + tmp_kernel_op_242);
+                const real_t tmp_kernel_op_246 = tmp_kernel_op_152*(tmp_kernel_op_244 + tmp_kernel_op_245);
+                const real_t tmp_kernel_op_247 = tmp_kernel_op_147*tmp_kernel_op_243 + tmp_kernel_op_150*tmp_kernel_op_246;
+                const real_t tmp_kernel_op_248 = tmp_kernel_op_149*tmp_kernel_op_246 + tmp_kernel_op_159*tmp_kernel_op_243;
+                const real_t tmp_kernel_op_249 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_239 + tmp_kernel_op_122*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_247 + tmp_kernel_op_160*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_40 + tmp_kernel_op_224*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_82 + tmp_kernel_op_232*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_251 = tmp_kernel_op_34*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_250 - tmp_kernel_op_217);
+                const real_t tmp_kernel_op_252 = tmp_kernel_op_34*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_250 - tmp_kernel_op_220);
+                const real_t tmp_kernel_op_253 = tmp_kernel_op_251*tmp_kernel_op_29 + tmp_kernel_op_252*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_254 = tmp_kernel_op_251*tmp_kernel_op_41 + tmp_kernel_op_252*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_256 = tmp_kernel_op_76*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_255 - tmp_kernel_op_225);
+                const real_t tmp_kernel_op_257 = tmp_kernel_op_76*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_255 - tmp_kernel_op_228);
+                const real_t tmp_kernel_op_258 = tmp_kernel_op_256*tmp_kernel_op_71 + tmp_kernel_op_257*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_259 = tmp_kernel_op_256*tmp_kernel_op_83 + tmp_kernel_op_257*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_261 = tmp_kernel_op_114*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_260 - tmp_kernel_op_233);
+                const real_t tmp_kernel_op_262 = tmp_kernel_op_114*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_260 - tmp_kernel_op_236);
+                const real_t tmp_kernel_op_263 = tmp_kernel_op_109*tmp_kernel_op_261 + tmp_kernel_op_112*tmp_kernel_op_262;
+                const real_t tmp_kernel_op_264 = tmp_kernel_op_111*tmp_kernel_op_262 + tmp_kernel_op_121*tmp_kernel_op_261;
+                const real_t tmp_kernel_op_266 = tmp_kernel_op_152*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_265 - tmp_kernel_op_241);
+                const real_t tmp_kernel_op_267 = tmp_kernel_op_152*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_265 - tmp_kernel_op_244);
+                const real_t tmp_kernel_op_268 = tmp_kernel_op_147*tmp_kernel_op_266 + tmp_kernel_op_150*tmp_kernel_op_267;
+                const real_t tmp_kernel_op_269 = tmp_kernel_op_149*tmp_kernel_op_267 + tmp_kernel_op_159*tmp_kernel_op_266;
+                const real_t tmp_kernel_op_270 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_263 + tmp_kernel_op_122*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_268 + tmp_kernel_op_160*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_40 + tmp_kernel_op_254*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_82 + tmp_kernel_op_259*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_272 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_271 - tmp_kernel_op_218);
+                const real_t tmp_kernel_op_273 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_271 - tmp_kernel_op_221);
+                const real_t tmp_kernel_op_274 = tmp_kernel_op_272*tmp_kernel_op_29 + tmp_kernel_op_273*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_275 = tmp_kernel_op_272*tmp_kernel_op_41 + tmp_kernel_op_273*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_277 = tmp_kernel_op_76*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_276 - tmp_kernel_op_226);
+                const real_t tmp_kernel_op_278 = tmp_kernel_op_76*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_276 - tmp_kernel_op_229);
+                const real_t tmp_kernel_op_279 = tmp_kernel_op_277*tmp_kernel_op_71 + tmp_kernel_op_278*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_280 = tmp_kernel_op_277*tmp_kernel_op_83 + tmp_kernel_op_278*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_282 = tmp_kernel_op_114*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_281 - tmp_kernel_op_234);
+                const real_t tmp_kernel_op_283 = tmp_kernel_op_114*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_281 - tmp_kernel_op_237);
+                const real_t tmp_kernel_op_284 = tmp_kernel_op_109*tmp_kernel_op_282 + tmp_kernel_op_112*tmp_kernel_op_283;
+                const real_t tmp_kernel_op_285 = tmp_kernel_op_111*tmp_kernel_op_283 + tmp_kernel_op_121*tmp_kernel_op_282;
+                const real_t tmp_kernel_op_287 = tmp_kernel_op_152*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_286 - tmp_kernel_op_242);
+                const real_t tmp_kernel_op_288 = tmp_kernel_op_152*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_286 - tmp_kernel_op_245);
+                const real_t tmp_kernel_op_289 = tmp_kernel_op_147*tmp_kernel_op_287 + tmp_kernel_op_150*tmp_kernel_op_288;
+                const real_t tmp_kernel_op_290 = tmp_kernel_op_149*tmp_kernel_op_288 + tmp_kernel_op_159*tmp_kernel_op_287;
+                const real_t tmp_kernel_op_291 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_284 + tmp_kernel_op_122*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_289 + tmp_kernel_op_160*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_274*tmp_kernel_op_40 + tmp_kernel_op_275*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_279*tmp_kernel_op_82 + tmp_kernel_op_280*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_292 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_209 + tmp_kernel_op_189*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_214 + tmp_kernel_op_194*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_199 + tmp_kernel_op_179*tmp_kernel_op_200) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_204 + tmp_kernel_op_184*tmp_kernel_op_205);
+                const real_t tmp_kernel_op_293 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_239 + tmp_kernel_op_189*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_247 + tmp_kernel_op_194*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_223 + tmp_kernel_op_179*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_231 + tmp_kernel_op_184*tmp_kernel_op_232);
+                const real_t tmp_kernel_op_294 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_263 + tmp_kernel_op_189*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_268 + tmp_kernel_op_194*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_253 + tmp_kernel_op_179*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_258 + tmp_kernel_op_184*tmp_kernel_op_259);
+                const real_t tmp_kernel_op_295 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_284 + tmp_kernel_op_189*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_289 + tmp_kernel_op_194*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_274 + tmp_kernel_op_179*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_279 + tmp_kernel_op_184*tmp_kernel_op_280);
+                const real_t tmp_kernel_op_296 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_239 + tmp_kernel_op_210*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_247 + tmp_kernel_op_215*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_223 + tmp_kernel_op_200*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_231 + tmp_kernel_op_205*tmp_kernel_op_232);
+                const real_t tmp_kernel_op_297 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_263 + tmp_kernel_op_210*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_268 + tmp_kernel_op_215*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_253 + tmp_kernel_op_200*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_258 + tmp_kernel_op_205*tmp_kernel_op_259);
+                const real_t tmp_kernel_op_298 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_284 + tmp_kernel_op_210*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_289 + tmp_kernel_op_215*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_274 + tmp_kernel_op_200*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_279 + tmp_kernel_op_205*tmp_kernel_op_280);
+                const real_t tmp_kernel_op_299 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_263 + tmp_kernel_op_240*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_268 + tmp_kernel_op_248*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_253 + tmp_kernel_op_224*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_258 + tmp_kernel_op_232*tmp_kernel_op_259);
+                const real_t tmp_kernel_op_300 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_284 + tmp_kernel_op_240*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_289 + tmp_kernel_op_248*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_274 + tmp_kernel_op_224*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_279 + tmp_kernel_op_232*tmp_kernel_op_280);
+                const real_t tmp_kernel_op_301 = tmp_kernel_op_136*(tmp_kernel_op_263*tmp_kernel_op_284 + tmp_kernel_op_264*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_268*tmp_kernel_op_289 + tmp_kernel_op_269*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_274 + tmp_kernel_op_254*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_279 + tmp_kernel_op_259*tmp_kernel_op_280);
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_136*((tmp_kernel_op_120*tmp_kernel_op_120) + (tmp_kernel_op_122*tmp_kernel_op_122)) + tmp_kernel_op_174*((tmp_kernel_op_158*tmp_kernel_op_158) + (tmp_kernel_op_160*tmp_kernel_op_160)) + tmp_kernel_op_60*((tmp_kernel_op_40*tmp_kernel_op_40) + (tmp_kernel_op_42*tmp_kernel_op_42)) + tmp_kernel_op_98*((tmp_kernel_op_82*tmp_kernel_op_82) + (tmp_kernel_op_84*tmp_kernel_op_84))) + src_dof_1*tmp_kernel_op_195 + src_dof_2*tmp_kernel_op_216 + src_dof_3*tmp_kernel_op_249 + src_dof_4*tmp_kernel_op_270 + src_dof_5*tmp_kernel_op_291;
+                const real_t elMatVec_1 = src_dof_0*tmp_kernel_op_195 + src_dof_1*(tmp_kernel_op_136*((tmp_kernel_op_188*tmp_kernel_op_188) + (tmp_kernel_op_189*tmp_kernel_op_189)) + tmp_kernel_op_174*((tmp_kernel_op_193*tmp_kernel_op_193) + (tmp_kernel_op_194*tmp_kernel_op_194)) + tmp_kernel_op_60*((tmp_kernel_op_178*tmp_kernel_op_178) + (tmp_kernel_op_179*tmp_kernel_op_179)) + tmp_kernel_op_98*((tmp_kernel_op_183*tmp_kernel_op_183) + (tmp_kernel_op_184*tmp_kernel_op_184))) + src_dof_2*tmp_kernel_op_292 + src_dof_3*tmp_kernel_op_293 + src_dof_4*tmp_kernel_op_294 + src_dof_5*tmp_kernel_op_295;
+                const real_t elMatVec_2 = src_dof_0*tmp_kernel_op_216 + src_dof_1*tmp_kernel_op_292 + src_dof_2*(tmp_kernel_op_136*((tmp_kernel_op_209*tmp_kernel_op_209) + (tmp_kernel_op_210*tmp_kernel_op_210)) + tmp_kernel_op_174*((tmp_kernel_op_214*tmp_kernel_op_214) + (tmp_kernel_op_215*tmp_kernel_op_215)) + tmp_kernel_op_60*((tmp_kernel_op_199*tmp_kernel_op_199) + (tmp_kernel_op_200*tmp_kernel_op_200)) + tmp_kernel_op_98*((tmp_kernel_op_204*tmp_kernel_op_204) + (tmp_kernel_op_205*tmp_kernel_op_205))) + src_dof_3*tmp_kernel_op_296 + src_dof_4*tmp_kernel_op_297 + src_dof_5*tmp_kernel_op_298;
+                const real_t elMatVec_3 = src_dof_0*tmp_kernel_op_249 + src_dof_1*tmp_kernel_op_293 + src_dof_2*tmp_kernel_op_296 + src_dof_3*(tmp_kernel_op_136*((tmp_kernel_op_239*tmp_kernel_op_239) + (tmp_kernel_op_240*tmp_kernel_op_240)) + tmp_kernel_op_174*((tmp_kernel_op_247*tmp_kernel_op_247) + (tmp_kernel_op_248*tmp_kernel_op_248)) + tmp_kernel_op_60*((tmp_kernel_op_223*tmp_kernel_op_223) + (tmp_kernel_op_224*tmp_kernel_op_224)) + tmp_kernel_op_98*((tmp_kernel_op_231*tmp_kernel_op_231) + (tmp_kernel_op_232*tmp_kernel_op_232))) + src_dof_4*tmp_kernel_op_299 + src_dof_5*tmp_kernel_op_300;
+                const real_t elMatVec_4 = src_dof_0*tmp_kernel_op_270 + src_dof_1*tmp_kernel_op_294 + src_dof_2*tmp_kernel_op_297 + src_dof_3*tmp_kernel_op_299 + src_dof_4*(tmp_kernel_op_136*((tmp_kernel_op_263*tmp_kernel_op_263) + (tmp_kernel_op_264*tmp_kernel_op_264)) + tmp_kernel_op_174*((tmp_kernel_op_268*tmp_kernel_op_268) + (tmp_kernel_op_269*tmp_kernel_op_269)) + tmp_kernel_op_60*((tmp_kernel_op_253*tmp_kernel_op_253) + (tmp_kernel_op_254*tmp_kernel_op_254)) + tmp_kernel_op_98*((tmp_kernel_op_258*tmp_kernel_op_258) + (tmp_kernel_op_259*tmp_kernel_op_259))) + src_dof_5*tmp_kernel_op_301;
+                const real_t elMatVec_5 = src_dof_0*tmp_kernel_op_291 + src_dof_1*tmp_kernel_op_295 + src_dof_2*tmp_kernel_op_298 + src_dof_3*tmp_kernel_op_300 + src_dof_4*tmp_kernel_op_301 + src_dof_5*(tmp_kernel_op_136*((tmp_kernel_op_284*tmp_kernel_op_284) + (tmp_kernel_op_285*tmp_kernel_op_285)) + tmp_kernel_op_174*((tmp_kernel_op_289*tmp_kernel_op_289) + (tmp_kernel_op_290*tmp_kernel_op_290)) + tmp_kernel_op_60*((tmp_kernel_op_274*tmp_kernel_op_274) + (tmp_kernel_op_275*tmp_kernel_op_275)) + tmp_kernel_op_98*((tmp_kernel_op_279*tmp_kernel_op_279) + (tmp_kernel_op_280*tmp_kernel_op_280)));
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_2 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_78;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_77;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_78;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_77;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_116;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_115;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_116;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_115;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_154;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_153;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_154;
+       const real_t tmp_moved_constant_15 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_153;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_6 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_0);
+                const __m256d tmp_kernel_op_7 = _mm256_mul_pd(tmp_kernel_op_6,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_10 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_11 = _mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_1);
+                const __m256d tmp_kernel_op_13 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_12);
+                const __m256d tmp_kernel_op_14 = _mm256_add_pd(tmp_kernel_op_13,tmp_kernel_op_7);
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_14)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_24 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_14),_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_14));
+                const __m256d tmp_kernel_op_27 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_6),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_12),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_29 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_13,tmp_kernel_op_28));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_22);
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),tmp_kernel_op_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_32 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_24),tmp_kernel_op_27),tmp_kernel_op_6),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_33 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_28),tmp_kernel_op_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_34 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_31),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_33)));
+                const __m256d tmp_kernel_op_38 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_39 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_40 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_38),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_39));
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_42 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_39),_mm256_mul_pd(tmp_kernel_op_38,tmp_kernel_op_41));
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_45 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_47 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_48 = _mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_47);
+                const __m256d tmp_kernel_op_49 = _mm256_add_pd(tmp_kernel_op_45,tmp_kernel_op_48);
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_49)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_52);
+                const __m256d tmp_kernel_op_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_47),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_49),_mm256_mul_pd(tmp_kernel_op_49,tmp_kernel_op_49)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_55,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_52);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_55,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_59 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_47);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(-0.28125,-0.28125,-0.28125,-0.28125),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_58)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_48,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_58,tmp_kernel_op_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_56,tmp_kernel_op_59))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61);
+                const __m256d tmp_kernel_op_63 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_64 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_63);
+                const __m256d tmp_kernel_op_65 = _mm256_add_pd(tmp_kernel_op_62,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_65)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_66);
+                const __m256d tmp_kernel_op_68 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_65),_mm256_mul_pd(tmp_kernel_op_65,tmp_kernel_op_65));
+                const __m256d tmp_kernel_op_69 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_61),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,tmp_kernel_op_69),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_71 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_70));
+                const __m256d tmp_kernel_op_72 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_66);
+                const __m256d tmp_kernel_op_73 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_68),tmp_kernel_op_69),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_63),tmp_kernel_op_68),tmp_kernel_op_69),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_75 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_63),tmp_kernel_op_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_76 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_73),_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_75)));
+                const __m256d tmp_kernel_op_80 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79))));
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79,tmp_kernel_op_79))));
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_81));
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_75,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_81),_mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_83));
+                const __m256d tmp_kernel_op_85 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_88 = _mm256_mul_pd(tmp_kernel_op_87,tmp_kernel_op_87);
+                const __m256d tmp_kernel_op_89 = _mm256_add_pd(tmp_kernel_op_86,tmp_kernel_op_88);
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_89)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_90);
+                const __m256d tmp_kernel_op_92 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_87),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_89),_mm256_mul_pd(tmp_kernel_op_89,tmp_kernel_op_89)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_92,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_95 = _mm256_mul_pd(tmp_kernel_op_87,tmp_kernel_op_90);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_92,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_97 = _mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_87);
+                const __m256d tmp_kernel_op_98 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_95,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_86,tmp_kernel_op_96)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,tmp_kernel_op_94),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_96,tmp_kernel_op_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_95,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_94,tmp_kernel_op_97))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_102 = _mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_101);
+                const __m256d tmp_kernel_op_103 = _mm256_add_pd(tmp_kernel_op_100,tmp_kernel_op_102);
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_103)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_105 = _mm256_mul_pd(tmp_kernel_op_104,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_106 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_103),_mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_103));
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_99),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_101),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_108 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_107),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_109 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_108));
+                const __m256d tmp_kernel_op_110 = _mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_104);
+                const __m256d tmp_kernel_op_111 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_106),tmp_kernel_op_107),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_112 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_106),tmp_kernel_op_107),tmp_kernel_op_99),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_113 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_108),tmp_kernel_op_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_114 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_111),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_113)));
+                const __m256d tmp_kernel_op_118 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117))));
+                const __m256d tmp_kernel_op_119 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117,tmp_kernel_op_117))));
+                const __m256d tmp_kernel_op_120 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_118),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_119));
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_122 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_119),_mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_121));
+                const __m256d tmp_kernel_op_123 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(tmp_kernel_op_123,tmp_kernel_op_123);
+                const __m256d tmp_kernel_op_125 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_125);
+                const __m256d tmp_kernel_op_127 = _mm256_add_pd(tmp_kernel_op_124,tmp_kernel_op_126);
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_127)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(tmp_kernel_op_123,tmp_kernel_op_128);
+                const __m256d tmp_kernel_op_130 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_125),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_123),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_131 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_127),_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_127)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_132 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_133 = _mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_128);
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_123,tmp_kernel_op_125);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_133,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_134)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_126,tmp_kernel_op_132),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_135),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_133,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_132,tmp_kernel_op_135))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_137 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_0);
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_137);
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_1);
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_139);
+                const __m256d tmp_kernel_op_141 = _mm256_add_pd(tmp_kernel_op_138,tmp_kernel_op_140);
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_141)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_143 = _mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_142);
+                const __m256d tmp_kernel_op_144 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_141),_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_141));
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_137),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_139),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_146 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_145),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_147 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_140,tmp_kernel_op_146));
+                const __m256d tmp_kernel_op_148 = _mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_142);
+                const __m256d tmp_kernel_op_149 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_144),tmp_kernel_op_145),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_150 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_139),tmp_kernel_op_144),tmp_kernel_op_145),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_151 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_137,tmp_kernel_op_139),tmp_kernel_op_146),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_152 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_149),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_151)));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155))));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155,tmp_kernel_op_155))));
+                const __m256d tmp_kernel_op_158 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_156),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_157));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_160 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_157),_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_159));
+                const __m256d tmp_kernel_op_161 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_161,tmp_kernel_op_161);
+                const __m256d tmp_kernel_op_163 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_164 = _mm256_mul_pd(tmp_kernel_op_163,tmp_kernel_op_163);
+                const __m256d tmp_kernel_op_165 = _mm256_add_pd(tmp_kernel_op_162,tmp_kernel_op_164);
+                const __m256d tmp_kernel_op_166 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_165)),_mm256_set_pd(tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51,tmp_kernel_op_51));
+                const __m256d tmp_kernel_op_167 = _mm256_mul_pd(tmp_kernel_op_161,tmp_kernel_op_166);
+                const __m256d tmp_kernel_op_168 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_163),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_161),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_169 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_165),_mm256_mul_pd(tmp_kernel_op_165,tmp_kernel_op_165)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_170 = _mm256_mul_pd(tmp_kernel_op_169,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_171 = _mm256_mul_pd(tmp_kernel_op_163,tmp_kernel_op_166);
+                const __m256d tmp_kernel_op_172 = _mm256_mul_pd(tmp_kernel_op_169,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,_mm256_set_pd(tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50,tmp_kernel_op_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_173 = _mm256_mul_pd(tmp_kernel_op_161,tmp_kernel_op_163);
+                const __m256d tmp_kernel_op_174 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_162,tmp_kernel_op_172)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_164,tmp_kernel_op_170),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_173),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_170,tmp_kernel_op_173))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_175 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35)));
+                const __m256d tmp_kernel_op_176 = _mm256_mul_pd(tmp_kernel_op_175,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_177 = _mm256_mul_pd(tmp_kernel_op_175,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_178 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_176,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_177,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_179 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_176,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_177,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_180 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77)));
+                const __m256d tmp_kernel_op_181 = _mm256_mul_pd(tmp_kernel_op_180,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_182 = _mm256_mul_pd(tmp_kernel_op_180,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_183 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_182,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_184 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_182,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_185 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_115,tmp_kernel_op_115,tmp_kernel_op_115,tmp_kernel_op_115)));
+                const __m256d tmp_kernel_op_186 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_187 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_188 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_186),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_187));
+                const __m256d tmp_kernel_op_189 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_187),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_186));
+                const __m256d tmp_kernel_op_190 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)));
+                const __m256d tmp_kernel_op_191 = _mm256_mul_pd(tmp_kernel_op_190,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_192 = _mm256_mul_pd(tmp_kernel_op_190,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_193 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_191),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_192));
+                const __m256d tmp_kernel_op_194 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_192),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_191));
+                const __m256d tmp_kernel_op_195 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_188),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_189))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_193),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_194)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_196 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36)));
+                const __m256d tmp_kernel_op_197 = _mm256_mul_pd(tmp_kernel_op_196,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_198 = _mm256_mul_pd(tmp_kernel_op_196,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_199 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_197,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_198,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_200 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_197,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_198,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_201 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_78,tmp_kernel_op_78,tmp_kernel_op_78,tmp_kernel_op_78)));
+                const __m256d tmp_kernel_op_202 = _mm256_mul_pd(tmp_kernel_op_201,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_203 = _mm256_mul_pd(tmp_kernel_op_201,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_204 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_203,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_205 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_203,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_206 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_116,tmp_kernel_op_116,tmp_kernel_op_116,tmp_kernel_op_116)));
+                const __m256d tmp_kernel_op_207 = _mm256_mul_pd(tmp_kernel_op_206,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_208 = _mm256_mul_pd(tmp_kernel_op_206,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_209 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_207),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_208));
+                const __m256d tmp_kernel_op_210 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_208),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_207));
+                const __m256d tmp_kernel_op_211 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_154,tmp_kernel_op_154,tmp_kernel_op_154,tmp_kernel_op_154)));
+                const __m256d tmp_kernel_op_212 = _mm256_mul_pd(tmp_kernel_op_211,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_213 = _mm256_mul_pd(tmp_kernel_op_211,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_214 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_212),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_213));
+                const __m256d tmp_kernel_op_215 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_213),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_212));
+                const __m256d tmp_kernel_op_216 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_209),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_210))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_215)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_219 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0),_mm256_set_pd(tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1)));
+                const __m256d tmp_kernel_op_222 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2),_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)));
+                const __m256d tmp_kernel_op_223 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_222,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_224 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_222,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_227 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5)));
+                const __m256d tmp_kernel_op_230 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)));
+                const __m256d tmp_kernel_op_231 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_227,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_230,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_232 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_227,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_230,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_235 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8),_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9)));
+                const __m256d tmp_kernel_op_238 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10),_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)));
+                const __m256d tmp_kernel_op_239 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_235),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_238));
+                const __m256d tmp_kernel_op_240 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_238),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_235));
+                const __m256d tmp_kernel_op_243 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12),_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)));
+                const __m256d tmp_kernel_op_246 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14),_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)));
+                const __m256d tmp_kernel_op_247 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_243),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_246));
+                const __m256d tmp_kernel_op_248 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_246),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_243));
+                const __m256d tmp_kernel_op_249 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_240))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_248)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_251 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250))));
+                const __m256d tmp_kernel_op_252 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250,tmp_kernel_op_250))));
+                const __m256d tmp_kernel_op_253 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_251,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_252,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_254 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_251,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_252,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_256 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255))));
+                const __m256d tmp_kernel_op_257 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255,tmp_kernel_op_255))));
+                const __m256d tmp_kernel_op_258 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_256,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_257,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_259 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_256,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_257,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_261 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260))));
+                const __m256d tmp_kernel_op_262 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260,tmp_kernel_op_260))));
+                const __m256d tmp_kernel_op_263 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_261),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_262));
+                const __m256d tmp_kernel_op_264 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_262),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_261));
+                const __m256d tmp_kernel_op_266 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265))));
+                const __m256d tmp_kernel_op_267 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265,tmp_kernel_op_265))));
+                const __m256d tmp_kernel_op_268 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_266),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_267));
+                const __m256d tmp_kernel_op_269 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_267),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_266));
+                const __m256d tmp_kernel_op_270 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_264))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_269)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_253,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_254,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_258,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_259,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_272 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271))));
+                const __m256d tmp_kernel_op_273 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271,tmp_kernel_op_271))));
+                const __m256d tmp_kernel_op_274 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_272,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_273,tmp_kernel_op_32));
+                const __m256d tmp_kernel_op_275 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_272,tmp_kernel_op_41),_mm256_mul_pd(tmp_kernel_op_273,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_277 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276))));
+                const __m256d tmp_kernel_op_278 = _mm256_mul_pd(tmp_kernel_op_76,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276,tmp_kernel_op_276))));
+                const __m256d tmp_kernel_op_279 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_277,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_278,tmp_kernel_op_74));
+                const __m256d tmp_kernel_op_280 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_277,tmp_kernel_op_83),_mm256_mul_pd(tmp_kernel_op_278,tmp_kernel_op_73));
+                const __m256d tmp_kernel_op_282 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281))));
+                const __m256d tmp_kernel_op_283 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281,tmp_kernel_op_281))));
+                const __m256d tmp_kernel_op_284 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_282),_mm256_mul_pd(tmp_kernel_op_112,tmp_kernel_op_283));
+                const __m256d tmp_kernel_op_285 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_111,tmp_kernel_op_283),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_282));
+                const __m256d tmp_kernel_op_287 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286))));
+                const __m256d tmp_kernel_op_288 = _mm256_mul_pd(tmp_kernel_op_152,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286,tmp_kernel_op_286))));
+                const __m256d tmp_kernel_op_289 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_287),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_288));
+                const __m256d tmp_kernel_op_290 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,tmp_kernel_op_288),_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_287));
+                const __m256d tmp_kernel_op_291 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_285))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_290)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_274,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_275,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_279,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_280,tmp_kernel_op_84))));
+                const __m256d tmp_kernel_op_292 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_199),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_200))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_204),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_205)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_209),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_210)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_215))));
+                const __m256d tmp_kernel_op_293 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_224))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_231),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_232)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_240)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_248))));
+                const __m256d tmp_kernel_op_294 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_269))));
+                const __m256d tmp_kernel_op_295 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_290))));
+                const __m256d tmp_kernel_op_296 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_224))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_231),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_232)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_240)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_248))));
+                const __m256d tmp_kernel_op_297 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_269))));
+                const __m256d tmp_kernel_op_298 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_290))));
+                const __m256d tmp_kernel_op_299 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_239,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_240,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_247,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_248,tmp_kernel_op_269))));
+                const __m256d tmp_kernel_op_300 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_239,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_240,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_247,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_248,tmp_kernel_op_290))));
+                const __m256d tmp_kernel_op_301 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_253,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_254,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_258,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_259,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_263,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_264,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_268,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_269,tmp_kernel_op_290))));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_120),_mm256_mul_pd(tmp_kernel_op_122,tmp_kernel_op_122))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_160,tmp_kernel_op_160)))),_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_40,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_42)))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_82),_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_84))))),_mm256_mul_pd(src_dof_1,tmp_kernel_op_195)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_216)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_249)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_270)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_291));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_178,tmp_kernel_op_178),_mm256_mul_pd(tmp_kernel_op_179,tmp_kernel_op_179))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_183),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_184)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_188,tmp_kernel_op_188),_mm256_mul_pd(tmp_kernel_op_189,tmp_kernel_op_189)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_193),_mm256_mul_pd(tmp_kernel_op_194,tmp_kernel_op_194))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_195)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_292)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_293)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_294)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_295));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_199),_mm256_mul_pd(tmp_kernel_op_200,tmp_kernel_op_200))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_204,tmp_kernel_op_204),_mm256_mul_pd(tmp_kernel_op_205,tmp_kernel_op_205)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_209,tmp_kernel_op_209),_mm256_mul_pd(tmp_kernel_op_210,tmp_kernel_op_210)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_214,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_215,tmp_kernel_op_215))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_216)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_292)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_296)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_297)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_298));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_223,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_224,tmp_kernel_op_224))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_231),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_232)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_239,tmp_kernel_op_239),_mm256_mul_pd(tmp_kernel_op_240,tmp_kernel_op_240)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_247,tmp_kernel_op_247),_mm256_mul_pd(tmp_kernel_op_248,tmp_kernel_op_248))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_249)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_293)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_296)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_299)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_300));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_253,tmp_kernel_op_253),_mm256_mul_pd(tmp_kernel_op_254,tmp_kernel_op_254))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_258,tmp_kernel_op_258),_mm256_mul_pd(tmp_kernel_op_259,tmp_kernel_op_259)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_263,tmp_kernel_op_263),_mm256_mul_pd(tmp_kernel_op_264,tmp_kernel_op_264)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_268,tmp_kernel_op_268),_mm256_mul_pd(tmp_kernel_op_269,tmp_kernel_op_269))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_270)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_294)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_297)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_299)),_mm256_mul_pd(src_dof_5,tmp_kernel_op_301));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_60,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_274,tmp_kernel_op_274),_mm256_mul_pd(tmp_kernel_op_275,tmp_kernel_op_275))),_mm256_mul_pd(tmp_kernel_op_98,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_279,tmp_kernel_op_279),_mm256_mul_pd(tmp_kernel_op_280,tmp_kernel_op_280)))),_mm256_mul_pd(tmp_kernel_op_136,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_284,tmp_kernel_op_284),_mm256_mul_pd(tmp_kernel_op_285,tmp_kernel_op_285)))),_mm256_mul_pd(tmp_kernel_op_174,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_289,tmp_kernel_op_289),_mm256_mul_pd(tmp_kernel_op_290,tmp_kernel_op_290))))),_mm256_mul_pd(src_dof_0,tmp_kernel_op_291)),_mm256_mul_pd(src_dof_1,tmp_kernel_op_295)),_mm256_mul_pd(src_dof_2,tmp_kernel_op_298)),_mm256_mul_pd(src_dof_3,tmp_kernel_op_300)),_mm256_mul_pd(src_dof_4,tmp_kernel_op_301));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+                const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+                const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+                const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+                const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+                const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+                const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+                const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+                const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+                const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+                const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_40 = tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39;
+                const real_t tmp_kernel_op_41 = -tmp_kernel_op_33;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = -p_affine_0_0;
+                const real_t tmp_kernel_op_44 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+                const real_t tmp_kernel_op_46 = -p_affine_0_1;
+                const real_t tmp_kernel_op_47 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_46 + tmp_kernel_op_8*0.33333333333333331;
+                const real_t tmp_kernel_op_48 = (tmp_kernel_op_47*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_49 = tmp_kernel_op_45 + tmp_kernel_op_48;
+                const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_49, -0.50000000000000000)*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_44*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_54 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_44) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_47);
+                const real_t tmp_kernel_op_55 = pow(tmp_kernel_op_49, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_47*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_58 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+                const real_t tmp_kernel_op_59 = tmp_kernel_op_44*tmp_kernel_op_47;
+                const real_t tmp_kernel_op_60 = abs_det_jac_affine_BLUE*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_53 - tmp_kernel_op_48*tmp_kernel_op_56)*(tmp_kernel_op_16*tmp_kernel_op_57 + tmp_kernel_op_45*tmp_kernel_op_58) - (tmp_kernel_op_0*tmp_kernel_op_57 + tmp_kernel_op_56*tmp_kernel_op_59)*(tmp_kernel_op_16*tmp_kernel_op_53 - tmp_kernel_op_58*tmp_kernel_op_59));
+                const real_t tmp_kernel_op_61 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+                const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+                const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+                const real_t tmp_kernel_op_65 = tmp_kernel_op_62 + tmp_kernel_op_64;
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_21*pow(tmp_kernel_op_65, -0.50000000000000000);
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_61*tmp_kernel_op_66;
+                const real_t tmp_kernel_op_68 = pow(tmp_kernel_op_65, -1.5000000000000000);
+                const real_t tmp_kernel_op_69 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_61) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_63));
+                const real_t tmp_kernel_op_70 = tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_1*tmp_kernel_op_67 + tmp_kernel_op_64*tmp_kernel_op_70;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_66;
+                const real_t tmp_kernel_op_73 = -tmp_kernel_op_17*tmp_kernel_op_72 + tmp_kernel_op_62*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+                const real_t tmp_kernel_op_74 = tmp_kernel_op_17*tmp_kernel_op_67 + tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+                const real_t tmp_kernel_op_75 = tmp_kernel_op_1*tmp_kernel_op_72 - tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_70;
+                const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_71*tmp_kernel_op_73 + tmp_kernel_op_74*tmp_kernel_op_75);
+                const real_t tmp_kernel_op_80 = tmp_kernel_op_76*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_81 = tmp_kernel_op_76*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_71*tmp_kernel_op_80 + tmp_kernel_op_74*tmp_kernel_op_81;
+                const real_t tmp_kernel_op_83 = -tmp_kernel_op_75;
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_73*tmp_kernel_op_81 + tmp_kernel_op_80*tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_86 = (tmp_kernel_op_85*tmp_kernel_op_85);
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_88 = (tmp_kernel_op_87*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_89 = tmp_kernel_op_86 + tmp_kernel_op_88;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_51*pow(tmp_kernel_op_89, -0.50000000000000000);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_85*tmp_kernel_op_90;
+                const real_t tmp_kernel_op_92 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_85) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_87);
+                const real_t tmp_kernel_op_93 = pow(tmp_kernel_op_89, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+                const real_t tmp_kernel_op_95 = tmp_kernel_op_87*tmp_kernel_op_90;
+                const real_t tmp_kernel_op_96 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+                const real_t tmp_kernel_op_97 = tmp_kernel_op_85*tmp_kernel_op_87;
+                const real_t tmp_kernel_op_98 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_91 - tmp_kernel_op_88*tmp_kernel_op_94)*(tmp_kernel_op_16*tmp_kernel_op_95 + tmp_kernel_op_86*tmp_kernel_op_96) - (tmp_kernel_op_0*tmp_kernel_op_95 + tmp_kernel_op_94*tmp_kernel_op_97)*(tmp_kernel_op_16*tmp_kernel_op_91 - tmp_kernel_op_96*tmp_kernel_op_97));
+                const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+                const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+                const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+                const real_t tmp_kernel_op_103 = tmp_kernel_op_100 + tmp_kernel_op_102;
+                const real_t tmp_kernel_op_104 = pow(tmp_kernel_op_103, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_104*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_103, -1.5000000000000000);
+                const real_t tmp_kernel_op_107 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_99) - tmp_kernel_op_17*(tmp_kernel_op_101 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_106*tmp_kernel_op_107*1.0;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_105 + tmp_kernel_op_102*tmp_kernel_op_108;
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_104;
+                const real_t tmp_kernel_op_111 = tmp_kernel_op_100*tmp_kernel_op_106*tmp_kernel_op_107*1.0 - tmp_kernel_op_110*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_101*tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_99*1.0 + tmp_kernel_op_105*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_113 = tmp_kernel_op_1*tmp_kernel_op_110 - tmp_kernel_op_101*tmp_kernel_op_108*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_109*tmp_kernel_op_111 + tmp_kernel_op_112*tmp_kernel_op_113);
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_114*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_117 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_117);
+                const real_t tmp_kernel_op_119 = tmp_kernel_op_114*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_117 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_117);
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_109*tmp_kernel_op_118 + tmp_kernel_op_112*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = -tmp_kernel_op_113;
+                const real_t tmp_kernel_op_122 = tmp_kernel_op_111*tmp_kernel_op_119 + tmp_kernel_op_118*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_123 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_124 = (tmp_kernel_op_123*tmp_kernel_op_123);
+                const real_t tmp_kernel_op_125 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.59999999999999998;
+                const real_t tmp_kernel_op_126 = (tmp_kernel_op_125*tmp_kernel_op_125);
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_124 + tmp_kernel_op_126;
+                const real_t tmp_kernel_op_128 = pow(tmp_kernel_op_127, -0.50000000000000000)*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_129 = tmp_kernel_op_123*tmp_kernel_op_128;
+                const real_t tmp_kernel_op_130 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_123) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_125);
+                const real_t tmp_kernel_op_131 = pow(tmp_kernel_op_127, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_132 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_133 = tmp_kernel_op_125*tmp_kernel_op_128;
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_123*tmp_kernel_op_125;
+                const real_t tmp_kernel_op_136 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_129 - tmp_kernel_op_126*tmp_kernel_op_132)*(tmp_kernel_op_124*tmp_kernel_op_134 + tmp_kernel_op_133*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_133 + tmp_kernel_op_132*tmp_kernel_op_135)*(tmp_kernel_op_129*tmp_kernel_op_16 - tmp_kernel_op_134*tmp_kernel_op_135));
+                const real_t tmp_kernel_op_137 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_138 = (tmp_kernel_op_137*tmp_kernel_op_137);
+                const real_t tmp_kernel_op_139 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_140 = (tmp_kernel_op_139*tmp_kernel_op_139);
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_138 + tmp_kernel_op_140;
+                const real_t tmp_kernel_op_142 = pow(tmp_kernel_op_141, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_137*tmp_kernel_op_142;
+                const real_t tmp_kernel_op_144 = pow(tmp_kernel_op_141, -1.5000000000000000);
+                const real_t tmp_kernel_op_145 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_137 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_139 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_144*tmp_kernel_op_145*1.0;
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_1*tmp_kernel_op_143 + tmp_kernel_op_140*tmp_kernel_op_146;
+                const real_t tmp_kernel_op_148 = tmp_kernel_op_139*tmp_kernel_op_142;
+                const real_t tmp_kernel_op_149 = tmp_kernel_op_138*tmp_kernel_op_144*tmp_kernel_op_145*1.0 - tmp_kernel_op_148*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_144*tmp_kernel_op_145*1.0 + tmp_kernel_op_143*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_1*tmp_kernel_op_148 - tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_146;
+                const real_t tmp_kernel_op_152 = 1.0 / (tmp_kernel_op_147*tmp_kernel_op_149 + tmp_kernel_op_150*tmp_kernel_op_151);
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_152*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_155 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_155);
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_152*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_155 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_155);
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_150*tmp_kernel_op_157;
+                const real_t tmp_kernel_op_159 = -tmp_kernel_op_151;
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_149*tmp_kernel_op_157 + tmp_kernel_op_156*tmp_kernel_op_159;
+                const real_t tmp_kernel_op_161 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_162 = (tmp_kernel_op_161*tmp_kernel_op_161);
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_164 = (tmp_kernel_op_163*tmp_kernel_op_163);
+                const real_t tmp_kernel_op_165 = tmp_kernel_op_162 + tmp_kernel_op_164;
+                const real_t tmp_kernel_op_166 = pow(tmp_kernel_op_165, -0.50000000000000000)*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_167 = tmp_kernel_op_161*tmp_kernel_op_166;
+                const real_t tmp_kernel_op_168 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_161) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_163);
+                const real_t tmp_kernel_op_169 = pow(tmp_kernel_op_165, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_170 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_171 = tmp_kernel_op_163*tmp_kernel_op_166;
+                const real_t tmp_kernel_op_172 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+                const real_t tmp_kernel_op_173 = tmp_kernel_op_161*tmp_kernel_op_163;
+                const real_t tmp_kernel_op_174 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_167 - tmp_kernel_op_164*tmp_kernel_op_170)*(tmp_kernel_op_16*tmp_kernel_op_171 + tmp_kernel_op_162*tmp_kernel_op_172) - (tmp_kernel_op_0*tmp_kernel_op_171 + tmp_kernel_op_170*tmp_kernel_op_173)*(tmp_kernel_op_16*tmp_kernel_op_167 - tmp_kernel_op_172*tmp_kernel_op_173));
+                const real_t tmp_kernel_op_175 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+                const real_t tmp_kernel_op_176 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_175;
+                const real_t tmp_kernel_op_177 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_175;
+                const real_t tmp_kernel_op_178 = tmp_kernel_op_176*tmp_kernel_op_29 + tmp_kernel_op_177*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_179 = tmp_kernel_op_176*tmp_kernel_op_41 + tmp_kernel_op_177*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_180 = tmp_kernel_op_76*(tmp_kernel_op_77 - 1.0);
+                const real_t tmp_kernel_op_181 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_180;
+                const real_t tmp_kernel_op_182 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_180;
+                const real_t tmp_kernel_op_183 = tmp_kernel_op_181*tmp_kernel_op_71 + tmp_kernel_op_182*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_184 = tmp_kernel_op_181*tmp_kernel_op_83 + tmp_kernel_op_182*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_185 = tmp_kernel_op_114*(tmp_kernel_op_115 - 1.0);
+                const real_t tmp_kernel_op_186 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_187 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_188 = tmp_kernel_op_109*tmp_kernel_op_186 + tmp_kernel_op_112*tmp_kernel_op_187;
+                const real_t tmp_kernel_op_189 = tmp_kernel_op_111*tmp_kernel_op_187 + tmp_kernel_op_121*tmp_kernel_op_186;
+                const real_t tmp_kernel_op_190 = tmp_kernel_op_152*(tmp_kernel_op_153 - 1.0);
+                const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_190;
+                const real_t tmp_kernel_op_192 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_190;
+                const real_t tmp_kernel_op_193 = tmp_kernel_op_147*tmp_kernel_op_191 + tmp_kernel_op_150*tmp_kernel_op_192;
+                const real_t tmp_kernel_op_194 = tmp_kernel_op_149*tmp_kernel_op_192 + tmp_kernel_op_159*tmp_kernel_op_191;
+                const real_t tmp_kernel_op_195 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_188 + tmp_kernel_op_122*tmp_kernel_op_189) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_193 + tmp_kernel_op_160*tmp_kernel_op_194) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_40 + tmp_kernel_op_179*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_82 + tmp_kernel_op_184*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+                const real_t tmp_kernel_op_197 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_196;
+                const real_t tmp_kernel_op_198 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_196;
+                const real_t tmp_kernel_op_199 = tmp_kernel_op_197*tmp_kernel_op_29 + tmp_kernel_op_198*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_200 = tmp_kernel_op_197*tmp_kernel_op_41 + tmp_kernel_op_198*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_201 = tmp_kernel_op_76*(tmp_kernel_op_78 - 1.0);
+                const real_t tmp_kernel_op_202 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_201;
+                const real_t tmp_kernel_op_203 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_201;
+                const real_t tmp_kernel_op_204 = tmp_kernel_op_202*tmp_kernel_op_71 + tmp_kernel_op_203*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_205 = tmp_kernel_op_202*tmp_kernel_op_83 + tmp_kernel_op_203*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_206 = tmp_kernel_op_114*(tmp_kernel_op_116 - 1.0);
+                const real_t tmp_kernel_op_207 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_206;
+                const real_t tmp_kernel_op_208 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_206;
+                const real_t tmp_kernel_op_209 = tmp_kernel_op_109*tmp_kernel_op_207 + tmp_kernel_op_112*tmp_kernel_op_208;
+                const real_t tmp_kernel_op_210 = tmp_kernel_op_111*tmp_kernel_op_208 + tmp_kernel_op_121*tmp_kernel_op_207;
+                const real_t tmp_kernel_op_211 = tmp_kernel_op_152*(tmp_kernel_op_154 - 1.0);
+                const real_t tmp_kernel_op_212 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_211;
+                const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_211;
+                const real_t tmp_kernel_op_214 = tmp_kernel_op_147*tmp_kernel_op_212 + tmp_kernel_op_150*tmp_kernel_op_213;
+                const real_t tmp_kernel_op_215 = tmp_kernel_op_149*tmp_kernel_op_213 + tmp_kernel_op_159*tmp_kernel_op_212;
+                const real_t tmp_kernel_op_216 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_209 + tmp_kernel_op_122*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_214 + tmp_kernel_op_160*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_40 + tmp_kernel_op_200*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_82 + tmp_kernel_op_205*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_219 = tmp_kernel_op_34*(tmp_moved_constant_0 + tmp_moved_constant_1);
+                const real_t tmp_kernel_op_222 = tmp_kernel_op_34*(tmp_moved_constant_2 + tmp_moved_constant_3);
+                const real_t tmp_kernel_op_223 = tmp_kernel_op_219*tmp_kernel_op_29 + tmp_kernel_op_222*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_224 = tmp_kernel_op_219*tmp_kernel_op_41 + tmp_kernel_op_222*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_227 = tmp_kernel_op_76*(tmp_moved_constant_4 + tmp_moved_constant_5);
+                const real_t tmp_kernel_op_230 = tmp_kernel_op_76*(tmp_moved_constant_6 + tmp_moved_constant_7);
+                const real_t tmp_kernel_op_231 = tmp_kernel_op_227*tmp_kernel_op_71 + tmp_kernel_op_230*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_232 = tmp_kernel_op_227*tmp_kernel_op_83 + tmp_kernel_op_230*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_235 = tmp_kernel_op_114*(tmp_moved_constant_8 + tmp_moved_constant_9);
+                const real_t tmp_kernel_op_238 = tmp_kernel_op_114*(tmp_moved_constant_10 + tmp_moved_constant_11);
+                const real_t tmp_kernel_op_239 = tmp_kernel_op_109*tmp_kernel_op_235 + tmp_kernel_op_112*tmp_kernel_op_238;
+                const real_t tmp_kernel_op_240 = tmp_kernel_op_111*tmp_kernel_op_238 + tmp_kernel_op_121*tmp_kernel_op_235;
+                const real_t tmp_kernel_op_243 = tmp_kernel_op_152*(tmp_moved_constant_12 + tmp_moved_constant_13);
+                const real_t tmp_kernel_op_246 = tmp_kernel_op_152*(tmp_moved_constant_14 + tmp_moved_constant_15);
+                const real_t tmp_kernel_op_247 = tmp_kernel_op_147*tmp_kernel_op_243 + tmp_kernel_op_150*tmp_kernel_op_246;
+                const real_t tmp_kernel_op_248 = tmp_kernel_op_149*tmp_kernel_op_246 + tmp_kernel_op_159*tmp_kernel_op_243;
+                const real_t tmp_kernel_op_249 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_239 + tmp_kernel_op_122*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_247 + tmp_kernel_op_160*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_40 + tmp_kernel_op_224*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_82 + tmp_kernel_op_232*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_251 = tmp_kernel_op_34*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_250 - tmp_moved_constant_0);
+                const real_t tmp_kernel_op_252 = tmp_kernel_op_34*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_250 - tmp_moved_constant_2);
+                const real_t tmp_kernel_op_253 = tmp_kernel_op_251*tmp_kernel_op_29 + tmp_kernel_op_252*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_254 = tmp_kernel_op_251*tmp_kernel_op_41 + tmp_kernel_op_252*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_256 = tmp_kernel_op_76*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_255 - tmp_moved_constant_4);
+                const real_t tmp_kernel_op_257 = tmp_kernel_op_76*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_255 - tmp_moved_constant_6);
+                const real_t tmp_kernel_op_258 = tmp_kernel_op_256*tmp_kernel_op_71 + tmp_kernel_op_257*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_259 = tmp_kernel_op_256*tmp_kernel_op_83 + tmp_kernel_op_257*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_261 = tmp_kernel_op_114*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_260 - tmp_moved_constant_8);
+                const real_t tmp_kernel_op_262 = tmp_kernel_op_114*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_260 - tmp_moved_constant_10);
+                const real_t tmp_kernel_op_263 = tmp_kernel_op_109*tmp_kernel_op_261 + tmp_kernel_op_112*tmp_kernel_op_262;
+                const real_t tmp_kernel_op_264 = tmp_kernel_op_111*tmp_kernel_op_262 + tmp_kernel_op_121*tmp_kernel_op_261;
+                const real_t tmp_kernel_op_266 = tmp_kernel_op_152*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_265 - tmp_moved_constant_12);
+                const real_t tmp_kernel_op_267 = tmp_kernel_op_152*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_265 - tmp_moved_constant_14);
+                const real_t tmp_kernel_op_268 = tmp_kernel_op_147*tmp_kernel_op_266 + tmp_kernel_op_150*tmp_kernel_op_267;
+                const real_t tmp_kernel_op_269 = tmp_kernel_op_149*tmp_kernel_op_267 + tmp_kernel_op_159*tmp_kernel_op_266;
+                const real_t tmp_kernel_op_270 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_263 + tmp_kernel_op_122*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_268 + tmp_kernel_op_160*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_40 + tmp_kernel_op_254*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_82 + tmp_kernel_op_259*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_272 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_271 - tmp_moved_constant_1);
+                const real_t tmp_kernel_op_273 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_271 - tmp_moved_constant_3);
+                const real_t tmp_kernel_op_274 = tmp_kernel_op_272*tmp_kernel_op_29 + tmp_kernel_op_273*tmp_kernel_op_32;
+                const real_t tmp_kernel_op_275 = tmp_kernel_op_272*tmp_kernel_op_41 + tmp_kernel_op_273*tmp_kernel_op_31;
+                const real_t tmp_kernel_op_277 = tmp_kernel_op_76*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_276 - tmp_moved_constant_5);
+                const real_t tmp_kernel_op_278 = tmp_kernel_op_76*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_276 - tmp_moved_constant_7);
+                const real_t tmp_kernel_op_279 = tmp_kernel_op_277*tmp_kernel_op_71 + tmp_kernel_op_278*tmp_kernel_op_74;
+                const real_t tmp_kernel_op_280 = tmp_kernel_op_277*tmp_kernel_op_83 + tmp_kernel_op_278*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_282 = tmp_kernel_op_114*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_281 - tmp_moved_constant_9);
+                const real_t tmp_kernel_op_283 = tmp_kernel_op_114*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_281 - tmp_moved_constant_11);
+                const real_t tmp_kernel_op_284 = tmp_kernel_op_109*tmp_kernel_op_282 + tmp_kernel_op_112*tmp_kernel_op_283;
+                const real_t tmp_kernel_op_285 = tmp_kernel_op_111*tmp_kernel_op_283 + tmp_kernel_op_121*tmp_kernel_op_282;
+                const real_t tmp_kernel_op_287 = tmp_kernel_op_152*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_286 - tmp_moved_constant_13);
+                const real_t tmp_kernel_op_288 = tmp_kernel_op_152*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_286 - tmp_moved_constant_15);
+                const real_t tmp_kernel_op_289 = tmp_kernel_op_147*tmp_kernel_op_287 + tmp_kernel_op_150*tmp_kernel_op_288;
+                const real_t tmp_kernel_op_290 = tmp_kernel_op_149*tmp_kernel_op_288 + tmp_kernel_op_159*tmp_kernel_op_287;
+                const real_t tmp_kernel_op_291 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_284 + tmp_kernel_op_122*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_289 + tmp_kernel_op_160*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_274*tmp_kernel_op_40 + tmp_kernel_op_275*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_279*tmp_kernel_op_82 + tmp_kernel_op_280*tmp_kernel_op_84);
+                const real_t tmp_kernel_op_292 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_209 + tmp_kernel_op_189*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_214 + tmp_kernel_op_194*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_199 + tmp_kernel_op_179*tmp_kernel_op_200) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_204 + tmp_kernel_op_184*tmp_kernel_op_205);
+                const real_t tmp_kernel_op_293 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_239 + tmp_kernel_op_189*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_247 + tmp_kernel_op_194*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_223 + tmp_kernel_op_179*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_231 + tmp_kernel_op_184*tmp_kernel_op_232);
+                const real_t tmp_kernel_op_294 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_263 + tmp_kernel_op_189*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_268 + tmp_kernel_op_194*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_253 + tmp_kernel_op_179*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_258 + tmp_kernel_op_184*tmp_kernel_op_259);
+                const real_t tmp_kernel_op_295 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_284 + tmp_kernel_op_189*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_289 + tmp_kernel_op_194*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_274 + tmp_kernel_op_179*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_279 + tmp_kernel_op_184*tmp_kernel_op_280);
+                const real_t tmp_kernel_op_296 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_239 + tmp_kernel_op_210*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_247 + tmp_kernel_op_215*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_223 + tmp_kernel_op_200*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_231 + tmp_kernel_op_205*tmp_kernel_op_232);
+                const real_t tmp_kernel_op_297 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_263 + tmp_kernel_op_210*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_268 + tmp_kernel_op_215*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_253 + tmp_kernel_op_200*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_258 + tmp_kernel_op_205*tmp_kernel_op_259);
+                const real_t tmp_kernel_op_298 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_284 + tmp_kernel_op_210*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_289 + tmp_kernel_op_215*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_274 + tmp_kernel_op_200*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_279 + tmp_kernel_op_205*tmp_kernel_op_280);
+                const real_t tmp_kernel_op_299 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_263 + tmp_kernel_op_240*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_268 + tmp_kernel_op_248*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_253 + tmp_kernel_op_224*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_258 + tmp_kernel_op_232*tmp_kernel_op_259);
+                const real_t tmp_kernel_op_300 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_284 + tmp_kernel_op_240*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_289 + tmp_kernel_op_248*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_274 + tmp_kernel_op_224*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_279 + tmp_kernel_op_232*tmp_kernel_op_280);
+                const real_t tmp_kernel_op_301 = tmp_kernel_op_136*(tmp_kernel_op_263*tmp_kernel_op_284 + tmp_kernel_op_264*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_268*tmp_kernel_op_289 + tmp_kernel_op_269*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_274 + tmp_kernel_op_254*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_279 + tmp_kernel_op_259*tmp_kernel_op_280);
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_136*((tmp_kernel_op_120*tmp_kernel_op_120) + (tmp_kernel_op_122*tmp_kernel_op_122)) + tmp_kernel_op_174*((tmp_kernel_op_158*tmp_kernel_op_158) + (tmp_kernel_op_160*tmp_kernel_op_160)) + tmp_kernel_op_60*((tmp_kernel_op_40*tmp_kernel_op_40) + (tmp_kernel_op_42*tmp_kernel_op_42)) + tmp_kernel_op_98*((tmp_kernel_op_82*tmp_kernel_op_82) + (tmp_kernel_op_84*tmp_kernel_op_84))) + src_dof_1*tmp_kernel_op_195 + src_dof_2*tmp_kernel_op_216 + src_dof_3*tmp_kernel_op_249 + src_dof_4*tmp_kernel_op_270 + src_dof_5*tmp_kernel_op_291;
+                const real_t elMatVec_1 = src_dof_0*tmp_kernel_op_195 + src_dof_1*(tmp_kernel_op_136*((tmp_kernel_op_188*tmp_kernel_op_188) + (tmp_kernel_op_189*tmp_kernel_op_189)) + tmp_kernel_op_174*((tmp_kernel_op_193*tmp_kernel_op_193) + (tmp_kernel_op_194*tmp_kernel_op_194)) + tmp_kernel_op_60*((tmp_kernel_op_178*tmp_kernel_op_178) + (tmp_kernel_op_179*tmp_kernel_op_179)) + tmp_kernel_op_98*((tmp_kernel_op_183*tmp_kernel_op_183) + (tmp_kernel_op_184*tmp_kernel_op_184))) + src_dof_2*tmp_kernel_op_292 + src_dof_3*tmp_kernel_op_293 + src_dof_4*tmp_kernel_op_294 + src_dof_5*tmp_kernel_op_295;
+                const real_t elMatVec_2 = src_dof_0*tmp_kernel_op_216 + src_dof_1*tmp_kernel_op_292 + src_dof_2*(tmp_kernel_op_136*((tmp_kernel_op_209*tmp_kernel_op_209) + (tmp_kernel_op_210*tmp_kernel_op_210)) + tmp_kernel_op_174*((tmp_kernel_op_214*tmp_kernel_op_214) + (tmp_kernel_op_215*tmp_kernel_op_215)) + tmp_kernel_op_60*((tmp_kernel_op_199*tmp_kernel_op_199) + (tmp_kernel_op_200*tmp_kernel_op_200)) + tmp_kernel_op_98*((tmp_kernel_op_204*tmp_kernel_op_204) + (tmp_kernel_op_205*tmp_kernel_op_205))) + src_dof_3*tmp_kernel_op_296 + src_dof_4*tmp_kernel_op_297 + src_dof_5*tmp_kernel_op_298;
+                const real_t elMatVec_3 = src_dof_0*tmp_kernel_op_249 + src_dof_1*tmp_kernel_op_293 + src_dof_2*tmp_kernel_op_296 + src_dof_3*(tmp_kernel_op_136*((tmp_kernel_op_239*tmp_kernel_op_239) + (tmp_kernel_op_240*tmp_kernel_op_240)) + tmp_kernel_op_174*((tmp_kernel_op_247*tmp_kernel_op_247) + (tmp_kernel_op_248*tmp_kernel_op_248)) + tmp_kernel_op_60*((tmp_kernel_op_223*tmp_kernel_op_223) + (tmp_kernel_op_224*tmp_kernel_op_224)) + tmp_kernel_op_98*((tmp_kernel_op_231*tmp_kernel_op_231) + (tmp_kernel_op_232*tmp_kernel_op_232))) + src_dof_4*tmp_kernel_op_299 + src_dof_5*tmp_kernel_op_300;
+                const real_t elMatVec_4 = src_dof_0*tmp_kernel_op_270 + src_dof_1*tmp_kernel_op_294 + src_dof_2*tmp_kernel_op_297 + src_dof_3*tmp_kernel_op_299 + src_dof_4*(tmp_kernel_op_136*((tmp_kernel_op_263*tmp_kernel_op_263) + (tmp_kernel_op_264*tmp_kernel_op_264)) + tmp_kernel_op_174*((tmp_kernel_op_268*tmp_kernel_op_268) + (tmp_kernel_op_269*tmp_kernel_op_269)) + tmp_kernel_op_60*((tmp_kernel_op_253*tmp_kernel_op_253) + (tmp_kernel_op_254*tmp_kernel_op_254)) + tmp_kernel_op_98*((tmp_kernel_op_258*tmp_kernel_op_258) + (tmp_kernel_op_259*tmp_kernel_op_259))) + src_dof_5*tmp_kernel_op_301;
+                const real_t elMatVec_5 = src_dof_0*tmp_kernel_op_291 + src_dof_1*tmp_kernel_op_295 + src_dof_2*tmp_kernel_op_298 + src_dof_3*tmp_kernel_op_300 + src_dof_4*tmp_kernel_op_301 + src_dof_5*(tmp_kernel_op_136*((tmp_kernel_op_284*tmp_kernel_op_284) + (tmp_kernel_op_285*tmp_kernel_op_285)) + tmp_kernel_op_174*((tmp_kernel_op_289*tmp_kernel_op_289) + (tmp_kernel_op_290*tmp_kernel_op_290)) + tmp_kernel_op_60*((tmp_kernel_op_274*tmp_kernel_op_274) + (tmp_kernel_op_275*tmp_kernel_op_275)) + tmp_kernel_op_98*((tmp_kernel_op_279*tmp_kernel_op_279) + (tmp_kernel_op_280*tmp_kernel_op_280)));
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..58716647909d0cae09330b730ca26729a618f8e7
--- /dev/null
+++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,1081 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_1 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_15 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_16 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_17 = -tmp_kernel_op_16;
+       const real_t tmp_kernel_op_18 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_19 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19*1.0 / (-tmp_kernel_op_1*tmp_kernel_op_15 + tmp_kernel_op_17*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_21 = tmp_kernel_op_20*1.0;
+       const real_t tmp_kernel_op_25 = -rayVertex_1;
+       const real_t tmp_kernel_op_26 = -rayVertex_0;
+       const real_t tmp_kernel_op_35 = 1.3333333333333333;
+       const real_t tmp_kernel_op_36 = 1.3333333333333333;
+       const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36 - 3.0;
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_19*1.0 / (tmp_kernel_op_0*tmp_kernel_op_15 - tmp_kernel_op_16*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_49 = tmp_kernel_op_48*1.0;
+       const real_t tmp_kernel_op_75 = 0.80000000000000004;
+       const real_t tmp_kernel_op_76 = 2.3999999999999999;
+       const real_t tmp_kernel_op_77 = tmp_kernel_op_75 + tmp_kernel_op_76 - 3.0;
+       const real_t tmp_kernel_op_111 = 2.3999999999999999;
+       const real_t tmp_kernel_op_112 = 0.80000000000000004;
+       const real_t tmp_kernel_op_113 = tmp_kernel_op_111 + tmp_kernel_op_112 - 3.0;
+       const real_t tmp_kernel_op_147 = 0.80000000000000004;
+       const real_t tmp_kernel_op_148 = 0.80000000000000004;
+       const real_t tmp_kernel_op_149 = tmp_kernel_op_147 + tmp_kernel_op_148 - 3.0;
+       const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_192 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_194 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_195 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_197 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_76;
+       const real_t tmp_kernel_op_198 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_75;
+       const real_t tmp_kernel_op_200 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_76;
+       const real_t tmp_kernel_op_201 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_75;
+       const real_t tmp_kernel_op_203 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_112;
+       const real_t tmp_kernel_op_204 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_111;
+       const real_t tmp_kernel_op_206 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_112;
+       const real_t tmp_kernel_op_207 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_111;
+       const real_t tmp_kernel_op_209 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_148;
+       const real_t tmp_kernel_op_210 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_147;
+       const real_t tmp_kernel_op_212 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_148;
+       const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_147;
+       const real_t tmp_kernel_op_215 = -tmp_kernel_op_35 + 1.3333333333333335;
+       const real_t tmp_kernel_op_218 = -tmp_kernel_op_75 - 0.79999999999999982;
+       const real_t tmp_kernel_op_221 = -tmp_kernel_op_111 + 2.3999999999999999;
+       const real_t tmp_kernel_op_224 = -tmp_kernel_op_147 + 2.3999999999999999;
+       const real_t tmp_kernel_op_227 = -tmp_kernel_op_36 + 1.3333333333333335;
+       const real_t tmp_kernel_op_230 = -tmp_kernel_op_76 + 2.3999999999999999;
+       const real_t tmp_kernel_op_233 = -tmp_kernel_op_112 - 0.79999999999999982;
+       const real_t tmp_kernel_op_236 = -tmp_kernel_op_148 + 2.3999999999999999;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_6 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_0);
+                const __m256d tmp_kernel_op_7 = _mm256_mul_pd(tmp_kernel_op_6,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_10 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_11 = _mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_1);
+                const __m256d tmp_kernel_op_13 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_12);
+                const __m256d tmp_kernel_op_14 = _mm256_add_pd(tmp_kernel_op_13,tmp_kernel_op_7);
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_14)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_24 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_14),_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_14));
+                const __m256d tmp_kernel_op_27 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_6),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_12),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_29 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_13,tmp_kernel_op_28));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_22);
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),tmp_kernel_op_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_32 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_24),tmp_kernel_op_27),tmp_kernel_op_6),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_33 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_28),tmp_kernel_op_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_34 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_31),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_33)));
+                const __m256d tmp_kernel_op_38 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_39 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_40 = _mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_42 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_42);
+                const __m256d tmp_kernel_op_44 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_47 = _mm256_add_pd(tmp_kernel_op_43,tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_50 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_47)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_51 = _mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_50);
+                const __m256d tmp_kernel_op_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_45),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_42),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_47),_mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_54 = _mm256_mul_pd(tmp_kernel_op_53,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_50);
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_53,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(-0.28125,-0.28125,-0.28125,-0.28125),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_55,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_43,tmp_kernel_op_56)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_46,tmp_kernel_op_54),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_56,tmp_kernel_op_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_55,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_54,tmp_kernel_op_57))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_59 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61);
+                const __m256d tmp_kernel_op_63 = _mm256_add_pd(tmp_kernel_op_60,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_63)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_66 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_63),_mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_63));
+                const __m256d tmp_kernel_op_67 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_59),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_67),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_69 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_65,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_68));
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_71 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_60,tmp_kernel_op_66),tmp_kernel_op_67),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_65,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_61),tmp_kernel_op_66),tmp_kernel_op_67),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_73 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_61),tmp_kernel_op_68),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_74 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_73)));
+                const __m256d tmp_kernel_op_78 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77))));
+                const __m256d tmp_kernel_op_79 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77))));
+                const __m256d tmp_kernel_op_80 = _mm256_mul_pd(tmp_kernel_op_73,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_81 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_82 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_81);
+                const __m256d tmp_kernel_op_83 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_84 = _mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_85 = _mm256_add_pd(tmp_kernel_op_82,tmp_kernel_op_84);
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_85)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_87 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_86);
+                const __m256d tmp_kernel_op_88 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_83),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_81),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_89 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_85),_mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_85)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_89,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_86);
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_89,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_92)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_92,tmp_kernel_op_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_90,tmp_kernel_op_93))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_95 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(tmp_kernel_op_95,tmp_kernel_op_95);
+                const __m256d tmp_kernel_op_97 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_98 = _mm256_mul_pd(tmp_kernel_op_97,tmp_kernel_op_97);
+                const __m256d tmp_kernel_op_99 = _mm256_add_pd(tmp_kernel_op_96,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_99)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_95);
+                const __m256d tmp_kernel_op_102 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_99),_mm256_mul_pd(tmp_kernel_op_99,tmp_kernel_op_99));
+                const __m256d tmp_kernel_op_103 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_95),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_105 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_104,tmp_kernel_op_98));
+                const __m256d tmp_kernel_op_106 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_97);
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),tmp_kernel_op_96),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_108 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),tmp_kernel_op_95),tmp_kernel_op_97),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_109 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_106,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_104,tmp_kernel_op_95),tmp_kernel_op_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_110 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_107),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_109)));
+                const __m256d tmp_kernel_op_114 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113))));
+                const __m256d tmp_kernel_op_115 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113))));
+                const __m256d tmp_kernel_op_116 = _mm256_mul_pd(tmp_kernel_op_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_117 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_118 = _mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_117);
+                const __m256d tmp_kernel_op_119 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_120 = _mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_add_pd(tmp_kernel_op_118,tmp_kernel_op_120);
+                const __m256d tmp_kernel_op_122 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_121)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_123 = _mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_122);
+                const __m256d tmp_kernel_op_124 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_119),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_117),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_125 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_121),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_121)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_124,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_122);
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_124,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_127,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_128)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_126),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,tmp_kernel_op_129),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_127,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_126,tmp_kernel_op_129))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_131 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_0);
+                const __m256d tmp_kernel_op_132 = _mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_131);
+                const __m256d tmp_kernel_op_133 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_1);
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_133);
+                const __m256d tmp_kernel_op_135 = _mm256_add_pd(tmp_kernel_op_132,tmp_kernel_op_134);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_135)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_136);
+                const __m256d tmp_kernel_op_138 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_135),_mm256_mul_pd(tmp_kernel_op_135,tmp_kernel_op_135));
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_131),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_133),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_139),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_141 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_140));
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_136);
+                const __m256d tmp_kernel_op_143 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_142,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_132,tmp_kernel_op_138),tmp_kernel_op_139),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_144 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_133),tmp_kernel_op_138),tmp_kernel_op_139),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_142,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_133),tmp_kernel_op_140),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_146 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_143),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_145)));
+                const __m256d tmp_kernel_op_150 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149))));
+                const __m256d tmp_kernel_op_151 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149))));
+                const __m256d tmp_kernel_op_152 = _mm256_mul_pd(tmp_kernel_op_145,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_153 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_154 = _mm256_mul_pd(tmp_kernel_op_153,tmp_kernel_op_153);
+                const __m256d tmp_kernel_op_155 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_155);
+                const __m256d tmp_kernel_op_157 = _mm256_add_pd(tmp_kernel_op_154,tmp_kernel_op_156);
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_157)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_153,tmp_kernel_op_158);
+                const __m256d tmp_kernel_op_160 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_155),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_153),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_161 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_157),_mm256_mul_pd(tmp_kernel_op_157,tmp_kernel_op_157)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_161,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_163 = _mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_158);
+                const __m256d tmp_kernel_op_164 = _mm256_mul_pd(tmp_kernel_op_161,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_165 = _mm256_mul_pd(tmp_kernel_op_153,tmp_kernel_op_155);
+                const __m256d tmp_kernel_op_166 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_164)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_162),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_164,tmp_kernel_op_165),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_162,tmp_kernel_op_165))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_167 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35)));
+                const __m256d tmp_kernel_op_168 = _mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_169 = _mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_170 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_75,tmp_kernel_op_75,tmp_kernel_op_75,tmp_kernel_op_75)));
+                const __m256d tmp_kernel_op_171 = _mm256_mul_pd(tmp_kernel_op_170,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_172 = _mm256_mul_pd(tmp_kernel_op_170,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_173 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_111,tmp_kernel_op_111,tmp_kernel_op_111,tmp_kernel_op_111)));
+                const __m256d tmp_kernel_op_174 = _mm256_mul_pd(tmp_kernel_op_173,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_175 = _mm256_mul_pd(tmp_kernel_op_173,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_176 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_147,tmp_kernel_op_147,tmp_kernel_op_147,tmp_kernel_op_147)));
+                const __m256d tmp_kernel_op_177 = _mm256_mul_pd(tmp_kernel_op_176,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY));
+                const __m256d tmp_kernel_op_178 = _mm256_mul_pd(tmp_kernel_op_176,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY));
+                const __m256d tmp_kernel_op_179 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36)));
+                const __m256d tmp_kernel_op_180 = _mm256_mul_pd(tmp_kernel_op_179,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_181 = _mm256_mul_pd(tmp_kernel_op_179,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_182 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_76,tmp_kernel_op_76,tmp_kernel_op_76,tmp_kernel_op_76)));
+                const __m256d tmp_kernel_op_183 = _mm256_mul_pd(tmp_kernel_op_182,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_184 = _mm256_mul_pd(tmp_kernel_op_182,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_185 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_112,tmp_kernel_op_112,tmp_kernel_op_112,tmp_kernel_op_112)));
+                const __m256d tmp_kernel_op_186 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_187 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_188 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_148,tmp_kernel_op_148,tmp_kernel_op_148,tmp_kernel_op_148)));
+                const __m256d tmp_kernel_op_189 = _mm256_mul_pd(tmp_kernel_op_188,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY));
+                const __m256d tmp_kernel_op_190 = _mm256_mul_pd(tmp_kernel_op_188,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY));
+                const __m256d tmp_kernel_op_193 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_191,tmp_kernel_op_191,tmp_kernel_op_191,tmp_kernel_op_191),_mm256_set_pd(tmp_kernel_op_192,tmp_kernel_op_192,tmp_kernel_op_192,tmp_kernel_op_192)));
+                const __m256d tmp_kernel_op_196 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_194,tmp_kernel_op_194,tmp_kernel_op_194,tmp_kernel_op_194),_mm256_set_pd(tmp_kernel_op_195,tmp_kernel_op_195,tmp_kernel_op_195,tmp_kernel_op_195)));
+                const __m256d tmp_kernel_op_199 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_197,tmp_kernel_op_197,tmp_kernel_op_197,tmp_kernel_op_197),_mm256_set_pd(tmp_kernel_op_198,tmp_kernel_op_198,tmp_kernel_op_198,tmp_kernel_op_198)));
+                const __m256d tmp_kernel_op_202 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_200,tmp_kernel_op_200,tmp_kernel_op_200,tmp_kernel_op_200),_mm256_set_pd(tmp_kernel_op_201,tmp_kernel_op_201,tmp_kernel_op_201,tmp_kernel_op_201)));
+                const __m256d tmp_kernel_op_205 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_203,tmp_kernel_op_203,tmp_kernel_op_203,tmp_kernel_op_203),_mm256_set_pd(tmp_kernel_op_204,tmp_kernel_op_204,tmp_kernel_op_204,tmp_kernel_op_204)));
+                const __m256d tmp_kernel_op_208 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_206,tmp_kernel_op_206,tmp_kernel_op_206,tmp_kernel_op_206),_mm256_set_pd(tmp_kernel_op_207,tmp_kernel_op_207,tmp_kernel_op_207,tmp_kernel_op_207)));
+                const __m256d tmp_kernel_op_211 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_209,tmp_kernel_op_209,tmp_kernel_op_209,tmp_kernel_op_209),_mm256_set_pd(tmp_kernel_op_210,tmp_kernel_op_210,tmp_kernel_op_210,tmp_kernel_op_210)));
+                const __m256d tmp_kernel_op_214 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_212,tmp_kernel_op_212,tmp_kernel_op_212,tmp_kernel_op_212),_mm256_set_pd(tmp_kernel_op_213,tmp_kernel_op_213,tmp_kernel_op_213,tmp_kernel_op_213)));
+                const __m256d tmp_kernel_op_216 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_191,tmp_kernel_op_191,tmp_kernel_op_191,tmp_kernel_op_191)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215))));
+                const __m256d tmp_kernel_op_217 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_194,tmp_kernel_op_194,tmp_kernel_op_194,tmp_kernel_op_194)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215))));
+                const __m256d tmp_kernel_op_219 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_197,tmp_kernel_op_197,tmp_kernel_op_197,tmp_kernel_op_197)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218))));
+                const __m256d tmp_kernel_op_220 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_200,tmp_kernel_op_200,tmp_kernel_op_200,tmp_kernel_op_200)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218))));
+                const __m256d tmp_kernel_op_222 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_203,tmp_kernel_op_203,tmp_kernel_op_203,tmp_kernel_op_203)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221))));
+                const __m256d tmp_kernel_op_223 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_206,tmp_kernel_op_206,tmp_kernel_op_206,tmp_kernel_op_206)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221))));
+                const __m256d tmp_kernel_op_225 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_209,tmp_kernel_op_209,tmp_kernel_op_209,tmp_kernel_op_209)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224))));
+                const __m256d tmp_kernel_op_226 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_212,tmp_kernel_op_212,tmp_kernel_op_212,tmp_kernel_op_212)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224))));
+                const __m256d tmp_kernel_op_228 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_192,tmp_kernel_op_192,tmp_kernel_op_192,tmp_kernel_op_192)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227))));
+                const __m256d tmp_kernel_op_229 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_195,tmp_kernel_op_195,tmp_kernel_op_195,tmp_kernel_op_195)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227))));
+                const __m256d tmp_kernel_op_231 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_198,tmp_kernel_op_198,tmp_kernel_op_198,tmp_kernel_op_198)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230))));
+                const __m256d tmp_kernel_op_232 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_201,tmp_kernel_op_201,tmp_kernel_op_201,tmp_kernel_op_201)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230))));
+                const __m256d tmp_kernel_op_234 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_204,tmp_kernel_op_204,tmp_kernel_op_204,tmp_kernel_op_204)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233))));
+                const __m256d tmp_kernel_op_235 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_207,tmp_kernel_op_207,tmp_kernel_op_207,tmp_kernel_op_207)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233))));
+                const __m256d tmp_kernel_op_237 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_210,tmp_kernel_op_210,tmp_kernel_op_210,tmp_kernel_op_210)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236))));
+                const __m256d tmp_kernel_op_238 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_213,tmp_kernel_op_213,tmp_kernel_op_213,tmp_kernel_op_213)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236))));
+                const __m256d elMatDiag_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_114),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_115)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_114),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_115))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_115),_mm256_mul_pd(tmp_kernel_op_114,tmp_kernel_op_116)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_115),_mm256_mul_pd(tmp_kernel_op_114,tmp_kernel_op_116))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_150),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_151)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_150),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_151))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_151),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_152)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_151),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_152)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_38),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_39)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_38),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_39))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_39),_mm256_mul_pd(tmp_kernel_op_38,tmp_kernel_op_40)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_39),_mm256_mul_pd(tmp_kernel_op_38,tmp_kernel_op_40)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_78),_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_79)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_78),_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_79))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_79),_mm256_mul_pd(tmp_kernel_op_78,tmp_kernel_op_80)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_79),_mm256_mul_pd(tmp_kernel_op_78,tmp_kernel_op_80))))));
+                const __m256d elMatDiag_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_174),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_175)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_174),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_175))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_175),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_174)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_175),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_174))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_177),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_178)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_177),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_178))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_178),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_177)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_178),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_177)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_186),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_187)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_186),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_187))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_187),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_186)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_187),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_186))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_189),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_190)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_189),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_190))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_190),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_189)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_190),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_189)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_205),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_208)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_205),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_208))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_208),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_205)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_208),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_205))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_211),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_214)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_211),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_214))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_211)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_211)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_222),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_223)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_222),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_223))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_222)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_222))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_225),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_226)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_225),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_226))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_226),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_225)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_226),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_225)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_234),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_235)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_234),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_235))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_235),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_234)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_235),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_234))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_237),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_238)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_237),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_238))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_238),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_237)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_238),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_237)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_71))))));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+                const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+                const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+                const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+                const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+                const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+                const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+                const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+                const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+                const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+                const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_40 = -tmp_kernel_op_33;
+                const real_t tmp_kernel_op_41 = -p_affine_0_0;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+                const real_t tmp_kernel_op_44 = -p_affine_0_1;
+                const real_t tmp_kernel_op_45 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_44 + tmp_kernel_op_8*0.33333333333333331;
+                const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+                const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+                const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+                const real_t tmp_kernel_op_52 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_45);
+                const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+                const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+                const real_t tmp_kernel_op_58 = abs_det_jac_affine_GRAY*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_16*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_0*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_16*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+                const real_t tmp_kernel_op_59 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+                const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+                const real_t tmp_kernel_op_61 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+                const real_t tmp_kernel_op_63 = tmp_kernel_op_60 + tmp_kernel_op_62;
+                const real_t tmp_kernel_op_64 = tmp_kernel_op_21*pow(tmp_kernel_op_63, -0.50000000000000000);
+                const real_t tmp_kernel_op_65 = tmp_kernel_op_59*tmp_kernel_op_64;
+                const real_t tmp_kernel_op_66 = pow(tmp_kernel_op_63, -1.5000000000000000);
+                const real_t tmp_kernel_op_67 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_59) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_61));
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+                const real_t tmp_kernel_op_69 = tmp_kernel_op_1*tmp_kernel_op_65 + tmp_kernel_op_62*tmp_kernel_op_68;
+                const real_t tmp_kernel_op_70 = tmp_kernel_op_61*tmp_kernel_op_64;
+                const real_t tmp_kernel_op_71 = -tmp_kernel_op_17*tmp_kernel_op_70 + tmp_kernel_op_60*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_17*tmp_kernel_op_65 + tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+                const real_t tmp_kernel_op_73 = tmp_kernel_op_1*tmp_kernel_op_70 - tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_68;
+                const real_t tmp_kernel_op_74 = 1.0 / (tmp_kernel_op_69*tmp_kernel_op_71 + tmp_kernel_op_72*tmp_kernel_op_73);
+                const real_t tmp_kernel_op_78 = tmp_kernel_op_74*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_77);
+                const real_t tmp_kernel_op_79 = tmp_kernel_op_74*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_77);
+                const real_t tmp_kernel_op_80 = -tmp_kernel_op_73;
+                const real_t tmp_kernel_op_81 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_82 = (tmp_kernel_op_81*tmp_kernel_op_81);
+                const real_t tmp_kernel_op_83 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_84 = (tmp_kernel_op_83*tmp_kernel_op_83);
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_82 + tmp_kernel_op_84;
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_49*pow(tmp_kernel_op_85, -0.50000000000000000);
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_81*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_88 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_81) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_83);
+                const real_t tmp_kernel_op_89 = pow(tmp_kernel_op_85, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_83*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_92 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+                const real_t tmp_kernel_op_93 = tmp_kernel_op_81*tmp_kernel_op_83;
+                const real_t tmp_kernel_op_94 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_87 - tmp_kernel_op_84*tmp_kernel_op_90)*(tmp_kernel_op_16*tmp_kernel_op_91 + tmp_kernel_op_82*tmp_kernel_op_92) - (tmp_kernel_op_0*tmp_kernel_op_91 + tmp_kernel_op_90*tmp_kernel_op_93)*(tmp_kernel_op_16*tmp_kernel_op_87 - tmp_kernel_op_92*tmp_kernel_op_93));
+                const real_t tmp_kernel_op_95 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_96 = (tmp_kernel_op_95*tmp_kernel_op_95);
+                const real_t tmp_kernel_op_97 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+                const real_t tmp_kernel_op_98 = (tmp_kernel_op_97*tmp_kernel_op_97);
+                const real_t tmp_kernel_op_99 = tmp_kernel_op_96 + tmp_kernel_op_98;
+                const real_t tmp_kernel_op_100 = tmp_kernel_op_21*pow(tmp_kernel_op_99, -0.50000000000000000);
+                const real_t tmp_kernel_op_101 = tmp_kernel_op_100*tmp_kernel_op_95;
+                const real_t tmp_kernel_op_102 = pow(tmp_kernel_op_99, -1.5000000000000000);
+                const real_t tmp_kernel_op_103 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_95) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_97));
+                const real_t tmp_kernel_op_104 = tmp_kernel_op_102*tmp_kernel_op_103*1.0;
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_1*tmp_kernel_op_101 + tmp_kernel_op_104*tmp_kernel_op_98;
+                const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_97;
+                const real_t tmp_kernel_op_107 = tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_96*1.0 - tmp_kernel_op_106*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_17 + tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_95*tmp_kernel_op_97*1.0;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_106 - tmp_kernel_op_104*tmp_kernel_op_95*tmp_kernel_op_97;
+                const real_t tmp_kernel_op_110 = 1.0 / (tmp_kernel_op_105*tmp_kernel_op_107 + tmp_kernel_op_108*tmp_kernel_op_109);
+                const real_t tmp_kernel_op_114 = tmp_kernel_op_110*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_113 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_113);
+                const real_t tmp_kernel_op_115 = tmp_kernel_op_110*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_113 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_113);
+                const real_t tmp_kernel_op_116 = -tmp_kernel_op_109;
+                const real_t tmp_kernel_op_117 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_118 = (tmp_kernel_op_117*tmp_kernel_op_117);
+                const real_t tmp_kernel_op_119 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.59999999999999998;
+                const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+                const real_t tmp_kernel_op_121 = tmp_kernel_op_118 + tmp_kernel_op_120;
+                const real_t tmp_kernel_op_122 = pow(tmp_kernel_op_121, -0.50000000000000000)*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_123 = tmp_kernel_op_117*tmp_kernel_op_122;
+                const real_t tmp_kernel_op_124 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_117) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_119);
+                const real_t tmp_kernel_op_125 = pow(tmp_kernel_op_121, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_126 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_119*tmp_kernel_op_122;
+                const real_t tmp_kernel_op_128 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_129 = tmp_kernel_op_117*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_130 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_123 - tmp_kernel_op_120*tmp_kernel_op_126)*(tmp_kernel_op_118*tmp_kernel_op_128 + tmp_kernel_op_127*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_129)*(tmp_kernel_op_123*tmp_kernel_op_16 - tmp_kernel_op_128*tmp_kernel_op_129));
+                const real_t tmp_kernel_op_131 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_132 = (tmp_kernel_op_131*tmp_kernel_op_131);
+                const real_t tmp_kernel_op_133 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_134 = (tmp_kernel_op_133*tmp_kernel_op_133);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_132 + tmp_kernel_op_134;
+                const real_t tmp_kernel_op_136 = pow(tmp_kernel_op_135, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_131*tmp_kernel_op_136;
+                const real_t tmp_kernel_op_138 = pow(tmp_kernel_op_135, -1.5000000000000000);
+                const real_t tmp_kernel_op_139 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_131 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_133 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_138*tmp_kernel_op_139*1.0;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_137 + tmp_kernel_op_134*tmp_kernel_op_140;
+                const real_t tmp_kernel_op_142 = tmp_kernel_op_133*tmp_kernel_op_136;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_132*tmp_kernel_op_138*tmp_kernel_op_139*1.0 - tmp_kernel_op_142*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_144 = tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_138*tmp_kernel_op_139*1.0 + tmp_kernel_op_137*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_145 = tmp_kernel_op_1*tmp_kernel_op_142 - tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_140;
+                const real_t tmp_kernel_op_146 = 1.0 / (tmp_kernel_op_141*tmp_kernel_op_143 + tmp_kernel_op_144*tmp_kernel_op_145);
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_146*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_149 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_149);
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_146*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_149 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_149);
+                const real_t tmp_kernel_op_152 = -tmp_kernel_op_145;
+                const real_t tmp_kernel_op_153 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_154 = (tmp_kernel_op_153*tmp_kernel_op_153);
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_156 = (tmp_kernel_op_155*tmp_kernel_op_155);
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_154 + tmp_kernel_op_156;
+                const real_t tmp_kernel_op_158 = pow(tmp_kernel_op_157, -0.50000000000000000)*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_153*tmp_kernel_op_158;
+                const real_t tmp_kernel_op_160 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_153) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_155);
+                const real_t tmp_kernel_op_161 = pow(tmp_kernel_op_157, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_162 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_155*tmp_kernel_op_158;
+                const real_t tmp_kernel_op_164 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_165 = tmp_kernel_op_153*tmp_kernel_op_155;
+                const real_t tmp_kernel_op_166 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_159 - tmp_kernel_op_156*tmp_kernel_op_162)*(tmp_kernel_op_154*tmp_kernel_op_164 + tmp_kernel_op_16*tmp_kernel_op_163) - (tmp_kernel_op_0*tmp_kernel_op_163 + tmp_kernel_op_162*tmp_kernel_op_165)*(tmp_kernel_op_159*tmp_kernel_op_16 - tmp_kernel_op_164*tmp_kernel_op_165));
+                const real_t tmp_kernel_op_167 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+                const real_t tmp_kernel_op_168 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_167;
+                const real_t tmp_kernel_op_169 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_167;
+                const real_t tmp_kernel_op_170 = tmp_kernel_op_74*(tmp_kernel_op_75 - 1.0);
+                const real_t tmp_kernel_op_171 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_170;
+                const real_t tmp_kernel_op_172 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_170;
+                const real_t tmp_kernel_op_173 = tmp_kernel_op_110*(tmp_kernel_op_111 - 1.0);
+                const real_t tmp_kernel_op_174 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_173;
+                const real_t tmp_kernel_op_175 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_173;
+                const real_t tmp_kernel_op_176 = tmp_kernel_op_146*(tmp_kernel_op_147 - 1.0);
+                const real_t tmp_kernel_op_177 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_176;
+                const real_t tmp_kernel_op_178 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_176;
+                const real_t tmp_kernel_op_179 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+                const real_t tmp_kernel_op_180 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_179;
+                const real_t tmp_kernel_op_181 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_179;
+                const real_t tmp_kernel_op_182 = tmp_kernel_op_74*(tmp_kernel_op_76 - 1.0);
+                const real_t tmp_kernel_op_183 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_182;
+                const real_t tmp_kernel_op_184 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_182;
+                const real_t tmp_kernel_op_185 = tmp_kernel_op_110*(tmp_kernel_op_112 - 1.0);
+                const real_t tmp_kernel_op_186 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_187 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_188 = tmp_kernel_op_146*(tmp_kernel_op_148 - 1.0);
+                const real_t tmp_kernel_op_189 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_188;
+                const real_t tmp_kernel_op_190 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_188;
+                const real_t tmp_kernel_op_193 = tmp_kernel_op_34*(tmp_kernel_op_191 + tmp_kernel_op_192);
+                const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_194 + tmp_kernel_op_195);
+                const real_t tmp_kernel_op_199 = tmp_kernel_op_74*(tmp_kernel_op_197 + tmp_kernel_op_198);
+                const real_t tmp_kernel_op_202 = tmp_kernel_op_74*(tmp_kernel_op_200 + tmp_kernel_op_201);
+                const real_t tmp_kernel_op_205 = tmp_kernel_op_110*(tmp_kernel_op_203 + tmp_kernel_op_204);
+                const real_t tmp_kernel_op_208 = tmp_kernel_op_110*(tmp_kernel_op_206 + tmp_kernel_op_207);
+                const real_t tmp_kernel_op_211 = tmp_kernel_op_146*(tmp_kernel_op_209 + tmp_kernel_op_210);
+                const real_t tmp_kernel_op_214 = tmp_kernel_op_146*(tmp_kernel_op_212 + tmp_kernel_op_213);
+                const real_t tmp_kernel_op_216 = tmp_kernel_op_34*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_215 - tmp_kernel_op_191);
+                const real_t tmp_kernel_op_217 = tmp_kernel_op_34*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_215 - tmp_kernel_op_194);
+                const real_t tmp_kernel_op_219 = tmp_kernel_op_74*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_218 - tmp_kernel_op_197);
+                const real_t tmp_kernel_op_220 = tmp_kernel_op_74*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_218 - tmp_kernel_op_200);
+                const real_t tmp_kernel_op_222 = tmp_kernel_op_110*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_221 - tmp_kernel_op_203);
+                const real_t tmp_kernel_op_223 = tmp_kernel_op_110*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_221 - tmp_kernel_op_206);
+                const real_t tmp_kernel_op_225 = tmp_kernel_op_146*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_224 - tmp_kernel_op_209);
+                const real_t tmp_kernel_op_226 = tmp_kernel_op_146*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_224 - tmp_kernel_op_212);
+                const real_t tmp_kernel_op_228 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_227 - tmp_kernel_op_192);
+                const real_t tmp_kernel_op_229 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_227 - tmp_kernel_op_195);
+                const real_t tmp_kernel_op_231 = tmp_kernel_op_74*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_230 - tmp_kernel_op_198);
+                const real_t tmp_kernel_op_232 = tmp_kernel_op_74*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_230 - tmp_kernel_op_201);
+                const real_t tmp_kernel_op_234 = tmp_kernel_op_110*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_233 - tmp_kernel_op_204);
+                const real_t tmp_kernel_op_235 = tmp_kernel_op_110*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_233 - tmp_kernel_op_207);
+                const real_t tmp_kernel_op_237 = tmp_kernel_op_146*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_236 - tmp_kernel_op_210);
+                const real_t tmp_kernel_op_238 = tmp_kernel_op_146*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_236 - tmp_kernel_op_213);
+                const real_t elMatDiag_0 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)*(tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)) + ((tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116)*(tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)*(tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)) + ((tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152)*(tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152))) + tmp_kernel_op_58*(((tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)*(tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)) + ((tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40)*(tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40))) + tmp_kernel_op_94*(((tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)*(tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)) + ((tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)*(tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)));
+                const real_t elMatDiag_1 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)*(tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)) + ((tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174)*(tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)*(tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)) + ((tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177)*(tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177))) + tmp_kernel_op_58*(((tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)*(tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)) + ((tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31)*(tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)*(tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)) + ((tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)*(tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)));
+                const real_t elMatDiag_2 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)*(tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)) + ((tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186)*(tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)*(tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)) + ((tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189)*(tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189))) + tmp_kernel_op_58*(((tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)*(tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)) + ((tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31)*(tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)*(tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)) + ((tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)*(tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)));
+                const real_t elMatDiag_3 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)*(tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)) + ((tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205)*(tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)*(tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)) + ((tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211)*(tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211))) + tmp_kernel_op_58*(((tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)*(tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)) + ((tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31)*(tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)*(tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)) + ((tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)*(tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)));
+                const real_t elMatDiag_4 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)*(tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)) + ((tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222)*(tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)*(tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)) + ((tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225)*(tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225))) + tmp_kernel_op_58*(((tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)*(tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)) + ((tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31)*(tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)*(tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)) + ((tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)*(tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)));
+                const real_t elMatDiag_5 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)*(tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)) + ((tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234)*(tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)*(tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)) + ((tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237)*(tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237))) + tmp_kernel_op_58*(((tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)*(tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)) + ((tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31)*(tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)*(tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)) + ((tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)*(tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)));
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_2 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_76;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_75;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_76;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_75;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_112;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_111;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_112;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_111;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_148;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_147;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_148;
+       const real_t tmp_moved_constant_15 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_147;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_6 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_0);
+                const __m256d tmp_kernel_op_7 = _mm256_mul_pd(tmp_kernel_op_6,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_10 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_11 = _mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),p_affine_0_1);
+                const __m256d tmp_kernel_op_13 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_12);
+                const __m256d tmp_kernel_op_14 = _mm256_add_pd(tmp_kernel_op_13,tmp_kernel_op_7);
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_14)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_24 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_14),_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_14));
+                const __m256d tmp_kernel_op_27 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_6),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_12),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_29 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_13,tmp_kernel_op_28));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_22);
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_27),tmp_kernel_op_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_32 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_24),tmp_kernel_op_27),tmp_kernel_op_6),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_33 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_12,tmp_kernel_op_28),tmp_kernel_op_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_34 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_31),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_33)));
+                const __m256d tmp_kernel_op_38 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_39 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37,tmp_kernel_op_37))));
+                const __m256d tmp_kernel_op_40 = _mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_42 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_42);
+                const __m256d tmp_kernel_op_44 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.33333333333333331,0.33333333333333331,0.33333333333333331,0.33333333333333331))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_47 = _mm256_add_pd(tmp_kernel_op_43,tmp_kernel_op_46);
+                const __m256d tmp_kernel_op_50 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_47)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_51 = _mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_50);
+                const __m256d tmp_kernel_op_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_45),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_42),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_47),_mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_54 = _mm256_mul_pd(tmp_kernel_op_53,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_50);
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_53,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(-0.28125,-0.28125,-0.28125,-0.28125),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_55,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_43,tmp_kernel_op_56)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_46,tmp_kernel_op_54),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_56,tmp_kernel_op_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_55,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_54,tmp_kernel_op_57))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_59 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61);
+                const __m256d tmp_kernel_op_63 = _mm256_add_pd(tmp_kernel_op_60,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_63)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_66 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_63),_mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_63));
+                const __m256d tmp_kernel_op_67 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_59),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_67),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_69 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_65,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_68));
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_71 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_60,tmp_kernel_op_66),tmp_kernel_op_67),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_65,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_61),tmp_kernel_op_66),tmp_kernel_op_67),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_73 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_61),tmp_kernel_op_68),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_74 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_71),_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_73)));
+                const __m256d tmp_kernel_op_78 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77))));
+                const __m256d tmp_kernel_op_79 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77,tmp_kernel_op_77))));
+                const __m256d tmp_kernel_op_80 = _mm256_mul_pd(tmp_kernel_op_73,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_81 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_82 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_81);
+                const __m256d tmp_kernel_op_83 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_84 = _mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_85 = _mm256_add_pd(tmp_kernel_op_82,tmp_kernel_op_84);
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_85)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_87 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_86);
+                const __m256d tmp_kernel_op_88 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_83),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_81),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_89 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_85),_mm256_mul_pd(tmp_kernel_op_85,tmp_kernel_op_85)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_89,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_86);
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_89,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_92)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_92,tmp_kernel_op_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_91,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_90,tmp_kernel_op_93))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_95 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_0);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(tmp_kernel_op_95,tmp_kernel_op_95);
+                const __m256d tmp_kernel_op_97 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),p_affine_0_1);
+                const __m256d tmp_kernel_op_98 = _mm256_mul_pd(tmp_kernel_op_97,tmp_kernel_op_97);
+                const __m256d tmp_kernel_op_99 = _mm256_add_pd(tmp_kernel_op_96,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_99)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_95);
+                const __m256d tmp_kernel_op_102 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_99),_mm256_mul_pd(tmp_kernel_op_99,tmp_kernel_op_99));
+                const __m256d tmp_kernel_op_103 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_95),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_105 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_104,tmp_kernel_op_98));
+                const __m256d tmp_kernel_op_106 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_97);
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),tmp_kernel_op_96),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_108 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),tmp_kernel_op_95),tmp_kernel_op_97),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_109 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_106,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_104,tmp_kernel_op_95),tmp_kernel_op_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_110 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_107),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_109)));
+                const __m256d tmp_kernel_op_114 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113))));
+                const __m256d tmp_kernel_op_115 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113,tmp_kernel_op_113))));
+                const __m256d tmp_kernel_op_116 = _mm256_mul_pd(tmp_kernel_op_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_117 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_118 = _mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_117);
+                const __m256d tmp_kernel_op_119 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.59999999999999998,0.59999999999999998,0.59999999999999998,0.59999999999999998))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_120 = _mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_add_pd(tmp_kernel_op_118,tmp_kernel_op_120);
+                const __m256d tmp_kernel_op_122 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_121)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_123 = _mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_122);
+                const __m256d tmp_kernel_op_124 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_119),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_117),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_125 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_121),_mm256_mul_pd(tmp_kernel_op_121,tmp_kernel_op_121)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_124,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_122);
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_124,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_127,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_128)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_126),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,tmp_kernel_op_129),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_127,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_126,tmp_kernel_op_129))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_131 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_0);
+                const __m256d tmp_kernel_op_132 = _mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_131);
+                const __m256d tmp_kernel_op_133 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_9,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),p_affine_0_1);
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_133);
+                const __m256d tmp_kernel_op_135 = _mm256_add_pd(tmp_kernel_op_132,tmp_kernel_op_134);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_135)),_mm256_set_pd(tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21,tmp_kernel_op_21));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_136);
+                const __m256d tmp_kernel_op_138 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_135),_mm256_mul_pd(tmp_kernel_op_135,tmp_kernel_op_135));
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26,tmp_kernel_op_26),tmp_kernel_op_131),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25,tmp_kernel_op_25),tmp_kernel_op_133),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17))),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_139),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_141 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_140));
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_136);
+                const __m256d tmp_kernel_op_143 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_142,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_132,tmp_kernel_op_138),tmp_kernel_op_139),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_144 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_133),tmp_kernel_op_138),tmp_kernel_op_139),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_142,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,tmp_kernel_op_133),tmp_kernel_op_140),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_146 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_143),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_145)));
+                const __m256d tmp_kernel_op_150 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149))));
+                const __m256d tmp_kernel_op_151 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149))));
+                const __m256d tmp_kernel_op_152 = _mm256_mul_pd(tmp_kernel_op_145,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_153 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_4,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_154 = _mm256_mul_pd(tmp_kernel_op_153,tmp_kernel_op_153);
+                const __m256d tmp_kernel_op_155 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.20000000000000001,0.20000000000000001,0.20000000000000001,0.20000000000000001))),tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_155);
+                const __m256d tmp_kernel_op_157 = _mm256_add_pd(tmp_kernel_op_154,tmp_kernel_op_156);
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_157)),_mm256_set_pd(tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49,tmp_kernel_op_49));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_153,tmp_kernel_op_158);
+                const __m256d tmp_kernel_op_160 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_155),_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_153),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_161 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_157),_mm256_mul_pd(tmp_kernel_op_157,tmp_kernel_op_157)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_161,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_163 = _mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_158);
+                const __m256d tmp_kernel_op_164 = _mm256_mul_pd(tmp_kernel_op_161,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_165 = _mm256_mul_pd(tmp_kernel_op_153,tmp_kernel_op_155);
+                const __m256d tmp_kernel_op_166 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.26041666666666669,0.26041666666666669,0.26041666666666669,0.26041666666666669),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_164)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_162),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_164,tmp_kernel_op_165),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_162,tmp_kernel_op_165))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_167 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35,tmp_kernel_op_35)));
+                const __m256d tmp_kernel_op_168 = _mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_169 = _mm256_mul_pd(tmp_kernel_op_167,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_170 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_75,tmp_kernel_op_75,tmp_kernel_op_75,tmp_kernel_op_75)));
+                const __m256d tmp_kernel_op_171 = _mm256_mul_pd(tmp_kernel_op_170,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_172 = _mm256_mul_pd(tmp_kernel_op_170,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_173 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_111,tmp_kernel_op_111,tmp_kernel_op_111,tmp_kernel_op_111)));
+                const __m256d tmp_kernel_op_174 = _mm256_mul_pd(tmp_kernel_op_173,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_175 = _mm256_mul_pd(tmp_kernel_op_173,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_176 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_147,tmp_kernel_op_147,tmp_kernel_op_147,tmp_kernel_op_147)));
+                const __m256d tmp_kernel_op_177 = _mm256_mul_pd(tmp_kernel_op_176,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE));
+                const __m256d tmp_kernel_op_178 = _mm256_mul_pd(tmp_kernel_op_176,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE));
+                const __m256d tmp_kernel_op_179 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36,tmp_kernel_op_36)));
+                const __m256d tmp_kernel_op_180 = _mm256_mul_pd(tmp_kernel_op_179,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_181 = _mm256_mul_pd(tmp_kernel_op_179,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_182 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_76,tmp_kernel_op_76,tmp_kernel_op_76,tmp_kernel_op_76)));
+                const __m256d tmp_kernel_op_183 = _mm256_mul_pd(tmp_kernel_op_182,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_184 = _mm256_mul_pd(tmp_kernel_op_182,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_185 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_112,tmp_kernel_op_112,tmp_kernel_op_112,tmp_kernel_op_112)));
+                const __m256d tmp_kernel_op_186 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_187 = _mm256_mul_pd(tmp_kernel_op_185,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_188 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_148,tmp_kernel_op_148,tmp_kernel_op_148,tmp_kernel_op_148)));
+                const __m256d tmp_kernel_op_189 = _mm256_mul_pd(tmp_kernel_op_188,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE));
+                const __m256d tmp_kernel_op_190 = _mm256_mul_pd(tmp_kernel_op_188,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE));
+                const __m256d tmp_kernel_op_193 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0),_mm256_set_pd(tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1)));
+                const __m256d tmp_kernel_op_196 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2),_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)));
+                const __m256d tmp_kernel_op_199 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5)));
+                const __m256d tmp_kernel_op_202 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)));
+                const __m256d tmp_kernel_op_205 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8),_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9)));
+                const __m256d tmp_kernel_op_208 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10),_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)));
+                const __m256d tmp_kernel_op_211 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12),_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)));
+                const __m256d tmp_kernel_op_214 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14),_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)));
+                const __m256d tmp_kernel_op_216 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215))));
+                const __m256d tmp_kernel_op_217 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215,tmp_kernel_op_215))));
+                const __m256d tmp_kernel_op_219 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218))));
+                const __m256d tmp_kernel_op_220 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218,tmp_kernel_op_218))));
+                const __m256d tmp_kernel_op_222 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221))));
+                const __m256d tmp_kernel_op_223 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221,tmp_kernel_op_221))));
+                const __m256d tmp_kernel_op_225 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224))));
+                const __m256d tmp_kernel_op_226 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224,tmp_kernel_op_224))));
+                const __m256d tmp_kernel_op_228 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227))));
+                const __m256d tmp_kernel_op_229 = _mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227,tmp_kernel_op_227))));
+                const __m256d tmp_kernel_op_231 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230))));
+                const __m256d tmp_kernel_op_232 = _mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230,tmp_kernel_op_230))));
+                const __m256d tmp_kernel_op_234 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233))));
+                const __m256d tmp_kernel_op_235 = _mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233,tmp_kernel_op_233))));
+                const __m256d tmp_kernel_op_237 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236))));
+                const __m256d tmp_kernel_op_238 = _mm256_mul_pd(tmp_kernel_op_146,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236,tmp_kernel_op_236))));
+                const __m256d elMatDiag_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_114),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_115)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_114),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_115))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_115),_mm256_mul_pd(tmp_kernel_op_114,tmp_kernel_op_116)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_115),_mm256_mul_pd(tmp_kernel_op_114,tmp_kernel_op_116))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_150),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_151)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_150),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_151))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_151),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_152)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_151),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_152)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_38),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_39)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,tmp_kernel_op_38),_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_39))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_39),_mm256_mul_pd(tmp_kernel_op_38,tmp_kernel_op_40)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_39),_mm256_mul_pd(tmp_kernel_op_38,tmp_kernel_op_40)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_78),_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_79)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_78),_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_79))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_79),_mm256_mul_pd(tmp_kernel_op_78,tmp_kernel_op_80)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_79),_mm256_mul_pd(tmp_kernel_op_78,tmp_kernel_op_80))))));
+                const __m256d elMatDiag_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_174),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_175)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_174),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_175))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_175),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_174)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_175),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_174))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_177),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_178)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_177),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_178))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_178),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_177)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_178),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_177)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_168,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_169,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_171,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_172,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_186),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_187)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_186),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_187))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_187),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_186)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_187),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_186))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_189),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_190)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_189),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_190))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_190),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_189)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_190),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_189)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_180,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_181,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_183,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_184,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_205),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_208)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_205),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_208))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_208),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_205)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_208),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_205))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_211),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_214)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_211),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_214))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_211)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_214),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_211)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_193,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_196,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_199,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_202,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_222),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_223)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_222),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_223))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_222)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_223),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_222))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_225),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_226)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_225),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_226))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_226),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_225)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_226),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_225)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_216,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_217,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_219,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_220,tmp_kernel_op_71))))));
+                const __m256d elMatDiag_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_234),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_235)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_234),_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_235))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_235),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_234)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_235),_mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_234))))),_mm256_mul_pd(tmp_kernel_op_166,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_237),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_238)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_237),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_238))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_238),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_237)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_238),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_237)))))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_32)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_29),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_32))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_31)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_228,tmp_kernel_op_40),_mm256_mul_pd(tmp_kernel_op_229,tmp_kernel_op_31)))))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_72)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_69),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_72))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_71)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_231,tmp_kernel_op_80),_mm256_mul_pd(tmp_kernel_op_232,tmp_kernel_op_71))))));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+                const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+                const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+                const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+                const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+                const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+                const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+                const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+                const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+                const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+                const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+                const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_37);
+                const real_t tmp_kernel_op_40 = -tmp_kernel_op_33;
+                const real_t tmp_kernel_op_41 = -p_affine_0_0;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+                const real_t tmp_kernel_op_44 = -p_affine_0_1;
+                const real_t tmp_kernel_op_45 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_44 + tmp_kernel_op_8*0.33333333333333331;
+                const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+                const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+                const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+                const real_t tmp_kernel_op_52 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_45);
+                const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+                const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+                const real_t tmp_kernel_op_58 = abs_det_jac_affine_BLUE*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_16*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_0*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_16*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+                const real_t tmp_kernel_op_59 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+                const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+                const real_t tmp_kernel_op_61 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+                const real_t tmp_kernel_op_63 = tmp_kernel_op_60 + tmp_kernel_op_62;
+                const real_t tmp_kernel_op_64 = tmp_kernel_op_21*pow(tmp_kernel_op_63, -0.50000000000000000);
+                const real_t tmp_kernel_op_65 = tmp_kernel_op_59*tmp_kernel_op_64;
+                const real_t tmp_kernel_op_66 = pow(tmp_kernel_op_63, -1.5000000000000000);
+                const real_t tmp_kernel_op_67 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_59) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_61));
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+                const real_t tmp_kernel_op_69 = tmp_kernel_op_1*tmp_kernel_op_65 + tmp_kernel_op_62*tmp_kernel_op_68;
+                const real_t tmp_kernel_op_70 = tmp_kernel_op_61*tmp_kernel_op_64;
+                const real_t tmp_kernel_op_71 = -tmp_kernel_op_17*tmp_kernel_op_70 + tmp_kernel_op_60*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_17*tmp_kernel_op_65 + tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+                const real_t tmp_kernel_op_73 = tmp_kernel_op_1*tmp_kernel_op_70 - tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_68;
+                const real_t tmp_kernel_op_74 = 1.0 / (tmp_kernel_op_69*tmp_kernel_op_71 + tmp_kernel_op_72*tmp_kernel_op_73);
+                const real_t tmp_kernel_op_78 = tmp_kernel_op_74*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_77);
+                const real_t tmp_kernel_op_79 = tmp_kernel_op_74*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_77);
+                const real_t tmp_kernel_op_80 = -tmp_kernel_op_73;
+                const real_t tmp_kernel_op_81 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_82 = (tmp_kernel_op_81*tmp_kernel_op_81);
+                const real_t tmp_kernel_op_83 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_84 = (tmp_kernel_op_83*tmp_kernel_op_83);
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_82 + tmp_kernel_op_84;
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_49*pow(tmp_kernel_op_85, -0.50000000000000000);
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_81*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_88 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_81) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_83);
+                const real_t tmp_kernel_op_89 = pow(tmp_kernel_op_85, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_83*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_92 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+                const real_t tmp_kernel_op_93 = tmp_kernel_op_81*tmp_kernel_op_83;
+                const real_t tmp_kernel_op_94 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_87 - tmp_kernel_op_84*tmp_kernel_op_90)*(tmp_kernel_op_16*tmp_kernel_op_91 + tmp_kernel_op_82*tmp_kernel_op_92) - (tmp_kernel_op_0*tmp_kernel_op_91 + tmp_kernel_op_90*tmp_kernel_op_93)*(tmp_kernel_op_16*tmp_kernel_op_87 - tmp_kernel_op_92*tmp_kernel_op_93));
+                const real_t tmp_kernel_op_95 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_96 = (tmp_kernel_op_95*tmp_kernel_op_95);
+                const real_t tmp_kernel_op_97 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+                const real_t tmp_kernel_op_98 = (tmp_kernel_op_97*tmp_kernel_op_97);
+                const real_t tmp_kernel_op_99 = tmp_kernel_op_96 + tmp_kernel_op_98;
+                const real_t tmp_kernel_op_100 = tmp_kernel_op_21*pow(tmp_kernel_op_99, -0.50000000000000000);
+                const real_t tmp_kernel_op_101 = tmp_kernel_op_100*tmp_kernel_op_95;
+                const real_t tmp_kernel_op_102 = pow(tmp_kernel_op_99, -1.5000000000000000);
+                const real_t tmp_kernel_op_103 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_95) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_97));
+                const real_t tmp_kernel_op_104 = tmp_kernel_op_102*tmp_kernel_op_103*1.0;
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_1*tmp_kernel_op_101 + tmp_kernel_op_104*tmp_kernel_op_98;
+                const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_97;
+                const real_t tmp_kernel_op_107 = tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_96*1.0 - tmp_kernel_op_106*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_17 + tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_95*tmp_kernel_op_97*1.0;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_106 - tmp_kernel_op_104*tmp_kernel_op_95*tmp_kernel_op_97;
+                const real_t tmp_kernel_op_110 = 1.0 / (tmp_kernel_op_105*tmp_kernel_op_107 + tmp_kernel_op_108*tmp_kernel_op_109);
+                const real_t tmp_kernel_op_114 = tmp_kernel_op_110*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_113 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_113);
+                const real_t tmp_kernel_op_115 = tmp_kernel_op_110*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_113 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_113);
+                const real_t tmp_kernel_op_116 = -tmp_kernel_op_109;
+                const real_t tmp_kernel_op_117 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_118 = (tmp_kernel_op_117*tmp_kernel_op_117);
+                const real_t tmp_kernel_op_119 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.59999999999999998;
+                const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+                const real_t tmp_kernel_op_121 = tmp_kernel_op_118 + tmp_kernel_op_120;
+                const real_t tmp_kernel_op_122 = pow(tmp_kernel_op_121, -0.50000000000000000)*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_123 = tmp_kernel_op_117*tmp_kernel_op_122;
+                const real_t tmp_kernel_op_124 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_117) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_119);
+                const real_t tmp_kernel_op_125 = pow(tmp_kernel_op_121, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_126 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_119*tmp_kernel_op_122;
+                const real_t tmp_kernel_op_128 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_129 = tmp_kernel_op_117*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_130 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_123 - tmp_kernel_op_120*tmp_kernel_op_126)*(tmp_kernel_op_118*tmp_kernel_op_128 + tmp_kernel_op_127*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_129)*(tmp_kernel_op_123*tmp_kernel_op_16 - tmp_kernel_op_128*tmp_kernel_op_129));
+                const real_t tmp_kernel_op_131 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+                const real_t tmp_kernel_op_132 = (tmp_kernel_op_131*tmp_kernel_op_131);
+                const real_t tmp_kernel_op_133 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+                const real_t tmp_kernel_op_134 = (tmp_kernel_op_133*tmp_kernel_op_133);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_132 + tmp_kernel_op_134;
+                const real_t tmp_kernel_op_136 = pow(tmp_kernel_op_135, -0.50000000000000000)*tmp_kernel_op_21;
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_131*tmp_kernel_op_136;
+                const real_t tmp_kernel_op_138 = pow(tmp_kernel_op_135, -1.5000000000000000);
+                const real_t tmp_kernel_op_139 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_131 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_133 + tmp_kernel_op_25));
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_138*tmp_kernel_op_139*1.0;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_137 + tmp_kernel_op_134*tmp_kernel_op_140;
+                const real_t tmp_kernel_op_142 = tmp_kernel_op_133*tmp_kernel_op_136;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_132*tmp_kernel_op_138*tmp_kernel_op_139*1.0 - tmp_kernel_op_142*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_144 = tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_138*tmp_kernel_op_139*1.0 + tmp_kernel_op_137*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_145 = tmp_kernel_op_1*tmp_kernel_op_142 - tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_140;
+                const real_t tmp_kernel_op_146 = 1.0 / (tmp_kernel_op_141*tmp_kernel_op_143 + tmp_kernel_op_144*tmp_kernel_op_145);
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_146*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_149 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_149);
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_146*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_149 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_149);
+                const real_t tmp_kernel_op_152 = -tmp_kernel_op_145;
+                const real_t tmp_kernel_op_153 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+                const real_t tmp_kernel_op_154 = (tmp_kernel_op_153*tmp_kernel_op_153);
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+                const real_t tmp_kernel_op_156 = (tmp_kernel_op_155*tmp_kernel_op_155);
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_154 + tmp_kernel_op_156;
+                const real_t tmp_kernel_op_158 = pow(tmp_kernel_op_157, -0.50000000000000000)*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_153*tmp_kernel_op_158;
+                const real_t tmp_kernel_op_160 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_153) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_155);
+                const real_t tmp_kernel_op_161 = pow(tmp_kernel_op_157, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_162 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_155*tmp_kernel_op_158;
+                const real_t tmp_kernel_op_164 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+                const real_t tmp_kernel_op_165 = tmp_kernel_op_153*tmp_kernel_op_155;
+                const real_t tmp_kernel_op_166 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_159 - tmp_kernel_op_156*tmp_kernel_op_162)*(tmp_kernel_op_154*tmp_kernel_op_164 + tmp_kernel_op_16*tmp_kernel_op_163) - (tmp_kernel_op_0*tmp_kernel_op_163 + tmp_kernel_op_162*tmp_kernel_op_165)*(tmp_kernel_op_159*tmp_kernel_op_16 - tmp_kernel_op_164*tmp_kernel_op_165));
+                const real_t tmp_kernel_op_167 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+                const real_t tmp_kernel_op_168 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_167;
+                const real_t tmp_kernel_op_169 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_167;
+                const real_t tmp_kernel_op_170 = tmp_kernel_op_74*(tmp_kernel_op_75 - 1.0);
+                const real_t tmp_kernel_op_171 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_170;
+                const real_t tmp_kernel_op_172 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_170;
+                const real_t tmp_kernel_op_173 = tmp_kernel_op_110*(tmp_kernel_op_111 - 1.0);
+                const real_t tmp_kernel_op_174 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_173;
+                const real_t tmp_kernel_op_175 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_173;
+                const real_t tmp_kernel_op_176 = tmp_kernel_op_146*(tmp_kernel_op_147 - 1.0);
+                const real_t tmp_kernel_op_177 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_176;
+                const real_t tmp_kernel_op_178 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_176;
+                const real_t tmp_kernel_op_179 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+                const real_t tmp_kernel_op_180 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_179;
+                const real_t tmp_kernel_op_181 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_179;
+                const real_t tmp_kernel_op_182 = tmp_kernel_op_74*(tmp_kernel_op_76 - 1.0);
+                const real_t tmp_kernel_op_183 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_182;
+                const real_t tmp_kernel_op_184 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_182;
+                const real_t tmp_kernel_op_185 = tmp_kernel_op_110*(tmp_kernel_op_112 - 1.0);
+                const real_t tmp_kernel_op_186 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_187 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_185;
+                const real_t tmp_kernel_op_188 = tmp_kernel_op_146*(tmp_kernel_op_148 - 1.0);
+                const real_t tmp_kernel_op_189 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_188;
+                const real_t tmp_kernel_op_190 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_188;
+                const real_t tmp_kernel_op_193 = tmp_kernel_op_34*(tmp_moved_constant_0 + tmp_moved_constant_1);
+                const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_moved_constant_2 + tmp_moved_constant_3);
+                const real_t tmp_kernel_op_199 = tmp_kernel_op_74*(tmp_moved_constant_4 + tmp_moved_constant_5);
+                const real_t tmp_kernel_op_202 = tmp_kernel_op_74*(tmp_moved_constant_6 + tmp_moved_constant_7);
+                const real_t tmp_kernel_op_205 = tmp_kernel_op_110*(tmp_moved_constant_8 + tmp_moved_constant_9);
+                const real_t tmp_kernel_op_208 = tmp_kernel_op_110*(tmp_moved_constant_10 + tmp_moved_constant_11);
+                const real_t tmp_kernel_op_211 = tmp_kernel_op_146*(tmp_moved_constant_12 + tmp_moved_constant_13);
+                const real_t tmp_kernel_op_214 = tmp_kernel_op_146*(tmp_moved_constant_14 + tmp_moved_constant_15);
+                const real_t tmp_kernel_op_216 = tmp_kernel_op_34*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_215 - tmp_moved_constant_0);
+                const real_t tmp_kernel_op_217 = tmp_kernel_op_34*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_215 - tmp_moved_constant_2);
+                const real_t tmp_kernel_op_219 = tmp_kernel_op_74*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_218 - tmp_moved_constant_4);
+                const real_t tmp_kernel_op_220 = tmp_kernel_op_74*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_218 - tmp_moved_constant_6);
+                const real_t tmp_kernel_op_222 = tmp_kernel_op_110*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_221 - tmp_moved_constant_8);
+                const real_t tmp_kernel_op_223 = tmp_kernel_op_110*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_221 - tmp_moved_constant_10);
+                const real_t tmp_kernel_op_225 = tmp_kernel_op_146*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_224 - tmp_moved_constant_12);
+                const real_t tmp_kernel_op_226 = tmp_kernel_op_146*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_224 - tmp_moved_constant_14);
+                const real_t tmp_kernel_op_228 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_227 - tmp_moved_constant_1);
+                const real_t tmp_kernel_op_229 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_227 - tmp_moved_constant_3);
+                const real_t tmp_kernel_op_231 = tmp_kernel_op_74*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_230 - tmp_moved_constant_5);
+                const real_t tmp_kernel_op_232 = tmp_kernel_op_74*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_230 - tmp_moved_constant_7);
+                const real_t tmp_kernel_op_234 = tmp_kernel_op_110*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_233 - tmp_moved_constant_9);
+                const real_t tmp_kernel_op_235 = tmp_kernel_op_110*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_233 - tmp_moved_constant_11);
+                const real_t tmp_kernel_op_237 = tmp_kernel_op_146*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_236 - tmp_moved_constant_13);
+                const real_t tmp_kernel_op_238 = tmp_kernel_op_146*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_236 - tmp_moved_constant_15);
+                const real_t elMatDiag_0 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)*(tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)) + ((tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116)*(tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)*(tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)) + ((tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152)*(tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152))) + tmp_kernel_op_58*(((tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)*(tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)) + ((tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40)*(tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40))) + tmp_kernel_op_94*(((tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)*(tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)) + ((tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)*(tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)));
+                const real_t elMatDiag_1 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)*(tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)) + ((tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174)*(tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)*(tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)) + ((tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177)*(tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177))) + tmp_kernel_op_58*(((tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)*(tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)) + ((tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31)*(tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)*(tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)) + ((tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)*(tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)));
+                const real_t elMatDiag_2 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)*(tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)) + ((tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186)*(tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)*(tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)) + ((tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189)*(tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189))) + tmp_kernel_op_58*(((tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)*(tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)) + ((tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31)*(tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)*(tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)) + ((tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)*(tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)));
+                const real_t elMatDiag_3 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)*(tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)) + ((tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205)*(tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)*(tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)) + ((tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211)*(tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211))) + tmp_kernel_op_58*(((tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)*(tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)) + ((tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31)*(tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)*(tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)) + ((tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)*(tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)));
+                const real_t elMatDiag_4 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)*(tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)) + ((tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222)*(tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)*(tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)) + ((tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225)*(tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225))) + tmp_kernel_op_58*(((tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)*(tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)) + ((tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31)*(tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)*(tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)) + ((tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)*(tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)));
+                const real_t elMatDiag_5 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)*(tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)) + ((tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234)*(tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)*(tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)) + ((tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237)*(tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237))) + tmp_kernel_op_58*(((tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)*(tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)) + ((tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31)*(tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)*(tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)) + ((tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)*(tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)));
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3d85930808269233303c0eff38f27129b16d0441
--- /dev/null
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp
@@ -0,0 +1,767 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_1 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_15 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_16 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_17 = -tmp_kernel_op_16;
+       const real_t tmp_kernel_op_18 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_19 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19*1.0 / (-tmp_kernel_op_1*tmp_kernel_op_15 + tmp_kernel_op_17*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_21 = tmp_kernel_op_20*1.0;
+       const real_t tmp_kernel_op_25 = -rayVertex_1;
+       const real_t tmp_kernel_op_26 = -rayVertex_0;
+       const real_t tmp_kernel_op_35 = 1.3333333333333333;
+       const real_t tmp_kernel_op_36 = 1.3333333333333333;
+       const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36 - 3.0;
+       const real_t tmp_kernel_op_50 = tmp_kernel_op_19*1.0 / (tmp_kernel_op_0*tmp_kernel_op_15 - tmp_kernel_op_16*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_51 = tmp_kernel_op_50*1.0;
+       const real_t tmp_kernel_op_77 = 0.80000000000000004;
+       const real_t tmp_kernel_op_78 = 2.3999999999999999;
+       const real_t tmp_kernel_op_79 = tmp_kernel_op_77 + tmp_kernel_op_78 - 3.0;
+       const real_t tmp_kernel_op_115 = 2.3999999999999999;
+       const real_t tmp_kernel_op_116 = 0.80000000000000004;
+       const real_t tmp_kernel_op_117 = tmp_kernel_op_115 + tmp_kernel_op_116 - 3.0;
+       const real_t tmp_kernel_op_153 = 0.80000000000000004;
+       const real_t tmp_kernel_op_154 = 0.80000000000000004;
+       const real_t tmp_kernel_op_155 = tmp_kernel_op_153 + tmp_kernel_op_154 - 3.0;
+       const real_t tmp_kernel_op_217 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_218 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_220 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_221 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_225 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_78;
+       const real_t tmp_kernel_op_226 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_77;
+       const real_t tmp_kernel_op_228 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_78;
+       const real_t tmp_kernel_op_229 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_77;
+       const real_t tmp_kernel_op_233 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_116;
+       const real_t tmp_kernel_op_234 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_115;
+       const real_t tmp_kernel_op_236 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_116;
+       const real_t tmp_kernel_op_237 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_115;
+       const real_t tmp_kernel_op_241 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_154;
+       const real_t tmp_kernel_op_242 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_153;
+       const real_t tmp_kernel_op_244 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_154;
+       const real_t tmp_kernel_op_245 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_153;
+       const real_t tmp_kernel_op_250 = -tmp_kernel_op_35 + 1.3333333333333335;
+       const real_t tmp_kernel_op_255 = -tmp_kernel_op_77 - 0.79999999999999982;
+       const real_t tmp_kernel_op_260 = -tmp_kernel_op_115 + 2.3999999999999999;
+       const real_t tmp_kernel_op_265 = -tmp_kernel_op_153 + 2.3999999999999999;
+       const real_t tmp_kernel_op_271 = -tmp_kernel_op_36 + 1.3333333333333335;
+       const real_t tmp_kernel_op_276 = -tmp_kernel_op_78 + 2.3999999999999999;
+       const real_t tmp_kernel_op_281 = -tmp_kernel_op_116 - 0.79999999999999982;
+       const real_t tmp_kernel_op_286 = -tmp_kernel_op_154 + 2.3999999999999999;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+             const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+             const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+             const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+             const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+             const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+             const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+             const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_40 = tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -tmp_kernel_op_33;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = -p_affine_0_0;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = -p_affine_0_1;
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_46 + tmp_kernel_op_8*0.33333333333333331;
+             const real_t tmp_kernel_op_48 = (tmp_kernel_op_47*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_45 + tmp_kernel_op_48;
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_49, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_44*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_54 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_44) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_47);
+             const real_t tmp_kernel_op_55 = pow(tmp_kernel_op_49, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_47*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_44*tmp_kernel_op_47;
+             const real_t tmp_kernel_op_60 = abs_det_jac_affine_GRAY*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_53 - tmp_kernel_op_48*tmp_kernel_op_56)*(tmp_kernel_op_16*tmp_kernel_op_57 + tmp_kernel_op_45*tmp_kernel_op_58) - (tmp_kernel_op_0*tmp_kernel_op_57 + tmp_kernel_op_56*tmp_kernel_op_59)*(tmp_kernel_op_16*tmp_kernel_op_53 - tmp_kernel_op_58*tmp_kernel_op_59));
+             const real_t tmp_kernel_op_61 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+             const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+             const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = tmp_kernel_op_62 + tmp_kernel_op_64;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_21*pow(tmp_kernel_op_65, -0.50000000000000000);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_61*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = pow(tmp_kernel_op_65, -1.5000000000000000);
+             const real_t tmp_kernel_op_69 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_61) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_1*tmp_kernel_op_67 + tmp_kernel_op_64*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_73 = -tmp_kernel_op_17*tmp_kernel_op_72 + tmp_kernel_op_62*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_17*tmp_kernel_op_67 + tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_1*tmp_kernel_op_72 - tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_71*tmp_kernel_op_73 + tmp_kernel_op_74*tmp_kernel_op_75);
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_76*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_76*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_71*tmp_kernel_op_80 + tmp_kernel_op_74*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_83 = -tmp_kernel_op_75;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_73*tmp_kernel_op_81 + tmp_kernel_op_80*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_86 = (tmp_kernel_op_85*tmp_kernel_op_85);
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_88 = (tmp_kernel_op_87*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_86 + tmp_kernel_op_88;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_51*pow(tmp_kernel_op_89, -0.50000000000000000);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_85*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_92 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_85) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_87);
+             const real_t tmp_kernel_op_93 = pow(tmp_kernel_op_89, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_87*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_85*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_98 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_91 - tmp_kernel_op_88*tmp_kernel_op_94)*(tmp_kernel_op_16*tmp_kernel_op_95 + tmp_kernel_op_86*tmp_kernel_op_96) - (tmp_kernel_op_0*tmp_kernel_op_95 + tmp_kernel_op_94*tmp_kernel_op_97)*(tmp_kernel_op_16*tmp_kernel_op_91 - tmp_kernel_op_96*tmp_kernel_op_97));
+             const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+             const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_100 + tmp_kernel_op_102;
+             const real_t tmp_kernel_op_104 = pow(tmp_kernel_op_103, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_103, -1.5000000000000000);
+             const real_t tmp_kernel_op_107 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_99) - tmp_kernel_op_17*(tmp_kernel_op_101 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_106*tmp_kernel_op_107*1.0;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_105 + tmp_kernel_op_102*tmp_kernel_op_108;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_104;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_100*tmp_kernel_op_106*tmp_kernel_op_107*1.0 - tmp_kernel_op_110*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_101*tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_99*1.0 + tmp_kernel_op_105*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_1*tmp_kernel_op_110 - tmp_kernel_op_101*tmp_kernel_op_108*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_109*tmp_kernel_op_111 + tmp_kernel_op_112*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_114*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_117 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_114*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_117 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_109*tmp_kernel_op_118 + tmp_kernel_op_112*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = -tmp_kernel_op_113;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_111*tmp_kernel_op_119 + tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_124 = (tmp_kernel_op_123*tmp_kernel_op_123);
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.59999999999999998;
+             const real_t tmp_kernel_op_126 = (tmp_kernel_op_125*tmp_kernel_op_125);
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124 + tmp_kernel_op_126;
+             const real_t tmp_kernel_op_128 = pow(tmp_kernel_op_127, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_123*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_130 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_123) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_125);
+             const real_t tmp_kernel_op_131 = pow(tmp_kernel_op_127, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_132 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_125*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_123*tmp_kernel_op_125;
+             const real_t tmp_kernel_op_136 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_129 - tmp_kernel_op_126*tmp_kernel_op_132)*(tmp_kernel_op_124*tmp_kernel_op_134 + tmp_kernel_op_133*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_133 + tmp_kernel_op_132*tmp_kernel_op_135)*(tmp_kernel_op_129*tmp_kernel_op_16 - tmp_kernel_op_134*tmp_kernel_op_135));
+             const real_t tmp_kernel_op_137 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_138 = (tmp_kernel_op_137*tmp_kernel_op_137);
+             const real_t tmp_kernel_op_139 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_140 = (tmp_kernel_op_139*tmp_kernel_op_139);
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_138 + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_142 = pow(tmp_kernel_op_141, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_137*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_144 = pow(tmp_kernel_op_141, -1.5000000000000000);
+             const real_t tmp_kernel_op_145 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_137 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_139 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_144*tmp_kernel_op_145*1.0;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_1*tmp_kernel_op_143 + tmp_kernel_op_140*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_139*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_138*tmp_kernel_op_144*tmp_kernel_op_145*1.0 - tmp_kernel_op_148*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_144*tmp_kernel_op_145*1.0 + tmp_kernel_op_143*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_1*tmp_kernel_op_148 - tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_152 = 1.0 / (tmp_kernel_op_147*tmp_kernel_op_149 + tmp_kernel_op_150*tmp_kernel_op_151);
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_152*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_155 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_152*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_155 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_150*tmp_kernel_op_157;
+             const real_t tmp_kernel_op_159 = -tmp_kernel_op_151;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_149*tmp_kernel_op_157 + tmp_kernel_op_156*tmp_kernel_op_159;
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_162 = (tmp_kernel_op_161*tmp_kernel_op_161);
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_164 = (tmp_kernel_op_163*tmp_kernel_op_163);
+             const real_t tmp_kernel_op_165 = tmp_kernel_op_162 + tmp_kernel_op_164;
+             const real_t tmp_kernel_op_166 = pow(tmp_kernel_op_165, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_167 = tmp_kernel_op_161*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_168 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_161) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_163);
+             const real_t tmp_kernel_op_169 = pow(tmp_kernel_op_165, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_170 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_171 = tmp_kernel_op_163*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_172 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_173 = tmp_kernel_op_161*tmp_kernel_op_163;
+             const real_t tmp_kernel_op_174 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_167 - tmp_kernel_op_164*tmp_kernel_op_170)*(tmp_kernel_op_16*tmp_kernel_op_171 + tmp_kernel_op_162*tmp_kernel_op_172) - (tmp_kernel_op_0*tmp_kernel_op_171 + tmp_kernel_op_170*tmp_kernel_op_173)*(tmp_kernel_op_16*tmp_kernel_op_167 - tmp_kernel_op_172*tmp_kernel_op_173));
+             const real_t tmp_kernel_op_175 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+             const real_t tmp_kernel_op_176 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_177 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_178 = tmp_kernel_op_176*tmp_kernel_op_29 + tmp_kernel_op_177*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_179 = tmp_kernel_op_176*tmp_kernel_op_41 + tmp_kernel_op_177*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_180 = tmp_kernel_op_76*(tmp_kernel_op_77 - 1.0);
+             const real_t tmp_kernel_op_181 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_182 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_183 = tmp_kernel_op_181*tmp_kernel_op_71 + tmp_kernel_op_182*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_184 = tmp_kernel_op_181*tmp_kernel_op_83 + tmp_kernel_op_182*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_185 = tmp_kernel_op_114*(tmp_kernel_op_115 - 1.0);
+             const real_t tmp_kernel_op_186 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_187 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_188 = tmp_kernel_op_109*tmp_kernel_op_186 + tmp_kernel_op_112*tmp_kernel_op_187;
+             const real_t tmp_kernel_op_189 = tmp_kernel_op_111*tmp_kernel_op_187 + tmp_kernel_op_121*tmp_kernel_op_186;
+             const real_t tmp_kernel_op_190 = tmp_kernel_op_152*(tmp_kernel_op_153 - 1.0);
+             const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_192 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_193 = tmp_kernel_op_147*tmp_kernel_op_191 + tmp_kernel_op_150*tmp_kernel_op_192;
+             const real_t tmp_kernel_op_194 = tmp_kernel_op_149*tmp_kernel_op_192 + tmp_kernel_op_159*tmp_kernel_op_191;
+             const real_t tmp_kernel_op_195 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_188 + tmp_kernel_op_122*tmp_kernel_op_189) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_193 + tmp_kernel_op_160*tmp_kernel_op_194) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_40 + tmp_kernel_op_179*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_82 + tmp_kernel_op_184*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+             const real_t tmp_kernel_op_197 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_198 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_199 = tmp_kernel_op_197*tmp_kernel_op_29 + tmp_kernel_op_198*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_200 = tmp_kernel_op_197*tmp_kernel_op_41 + tmp_kernel_op_198*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_201 = tmp_kernel_op_76*(tmp_kernel_op_78 - 1.0);
+             const real_t tmp_kernel_op_202 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_203 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_204 = tmp_kernel_op_202*tmp_kernel_op_71 + tmp_kernel_op_203*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_205 = tmp_kernel_op_202*tmp_kernel_op_83 + tmp_kernel_op_203*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_206 = tmp_kernel_op_114*(tmp_kernel_op_116 - 1.0);
+             const real_t tmp_kernel_op_207 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_208 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_209 = tmp_kernel_op_109*tmp_kernel_op_207 + tmp_kernel_op_112*tmp_kernel_op_208;
+             const real_t tmp_kernel_op_210 = tmp_kernel_op_111*tmp_kernel_op_208 + tmp_kernel_op_121*tmp_kernel_op_207;
+             const real_t tmp_kernel_op_211 = tmp_kernel_op_152*(tmp_kernel_op_154 - 1.0);
+             const real_t tmp_kernel_op_212 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_214 = tmp_kernel_op_147*tmp_kernel_op_212 + tmp_kernel_op_150*tmp_kernel_op_213;
+             const real_t tmp_kernel_op_215 = tmp_kernel_op_149*tmp_kernel_op_213 + tmp_kernel_op_159*tmp_kernel_op_212;
+             const real_t tmp_kernel_op_216 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_209 + tmp_kernel_op_122*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_214 + tmp_kernel_op_160*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_40 + tmp_kernel_op_200*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_82 + tmp_kernel_op_205*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_219 = tmp_kernel_op_34*(tmp_kernel_op_217 + tmp_kernel_op_218);
+             const real_t tmp_kernel_op_222 = tmp_kernel_op_34*(tmp_kernel_op_220 + tmp_kernel_op_221);
+             const real_t tmp_kernel_op_223 = tmp_kernel_op_219*tmp_kernel_op_29 + tmp_kernel_op_222*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_224 = tmp_kernel_op_219*tmp_kernel_op_41 + tmp_kernel_op_222*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_227 = tmp_kernel_op_76*(tmp_kernel_op_225 + tmp_kernel_op_226);
+             const real_t tmp_kernel_op_230 = tmp_kernel_op_76*(tmp_kernel_op_228 + tmp_kernel_op_229);
+             const real_t tmp_kernel_op_231 = tmp_kernel_op_227*tmp_kernel_op_71 + tmp_kernel_op_230*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_232 = tmp_kernel_op_227*tmp_kernel_op_83 + tmp_kernel_op_230*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_235 = tmp_kernel_op_114*(tmp_kernel_op_233 + tmp_kernel_op_234);
+             const real_t tmp_kernel_op_238 = tmp_kernel_op_114*(tmp_kernel_op_236 + tmp_kernel_op_237);
+             const real_t tmp_kernel_op_239 = tmp_kernel_op_109*tmp_kernel_op_235 + tmp_kernel_op_112*tmp_kernel_op_238;
+             const real_t tmp_kernel_op_240 = tmp_kernel_op_111*tmp_kernel_op_238 + tmp_kernel_op_121*tmp_kernel_op_235;
+             const real_t tmp_kernel_op_243 = tmp_kernel_op_152*(tmp_kernel_op_241 + tmp_kernel_op_242);
+             const real_t tmp_kernel_op_246 = tmp_kernel_op_152*(tmp_kernel_op_244 + tmp_kernel_op_245);
+             const real_t tmp_kernel_op_247 = tmp_kernel_op_147*tmp_kernel_op_243 + tmp_kernel_op_150*tmp_kernel_op_246;
+             const real_t tmp_kernel_op_248 = tmp_kernel_op_149*tmp_kernel_op_246 + tmp_kernel_op_159*tmp_kernel_op_243;
+             const real_t tmp_kernel_op_249 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_239 + tmp_kernel_op_122*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_247 + tmp_kernel_op_160*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_40 + tmp_kernel_op_224*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_82 + tmp_kernel_op_232*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_251 = tmp_kernel_op_34*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_250 - tmp_kernel_op_217);
+             const real_t tmp_kernel_op_252 = tmp_kernel_op_34*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_250 - tmp_kernel_op_220);
+             const real_t tmp_kernel_op_253 = tmp_kernel_op_251*tmp_kernel_op_29 + tmp_kernel_op_252*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_254 = tmp_kernel_op_251*tmp_kernel_op_41 + tmp_kernel_op_252*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_256 = tmp_kernel_op_76*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_255 - tmp_kernel_op_225);
+             const real_t tmp_kernel_op_257 = tmp_kernel_op_76*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_255 - tmp_kernel_op_228);
+             const real_t tmp_kernel_op_258 = tmp_kernel_op_256*tmp_kernel_op_71 + tmp_kernel_op_257*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_259 = tmp_kernel_op_256*tmp_kernel_op_83 + tmp_kernel_op_257*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_261 = tmp_kernel_op_114*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_260 - tmp_kernel_op_233);
+             const real_t tmp_kernel_op_262 = tmp_kernel_op_114*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_260 - tmp_kernel_op_236);
+             const real_t tmp_kernel_op_263 = tmp_kernel_op_109*tmp_kernel_op_261 + tmp_kernel_op_112*tmp_kernel_op_262;
+             const real_t tmp_kernel_op_264 = tmp_kernel_op_111*tmp_kernel_op_262 + tmp_kernel_op_121*tmp_kernel_op_261;
+             const real_t tmp_kernel_op_266 = tmp_kernel_op_152*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_265 - tmp_kernel_op_241);
+             const real_t tmp_kernel_op_267 = tmp_kernel_op_152*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_265 - tmp_kernel_op_244);
+             const real_t tmp_kernel_op_268 = tmp_kernel_op_147*tmp_kernel_op_266 + tmp_kernel_op_150*tmp_kernel_op_267;
+             const real_t tmp_kernel_op_269 = tmp_kernel_op_149*tmp_kernel_op_267 + tmp_kernel_op_159*tmp_kernel_op_266;
+             const real_t tmp_kernel_op_270 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_263 + tmp_kernel_op_122*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_268 + tmp_kernel_op_160*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_40 + tmp_kernel_op_254*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_82 + tmp_kernel_op_259*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_272 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_271 - tmp_kernel_op_218);
+             const real_t tmp_kernel_op_273 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_271 - tmp_kernel_op_221);
+             const real_t tmp_kernel_op_274 = tmp_kernel_op_272*tmp_kernel_op_29 + tmp_kernel_op_273*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_275 = tmp_kernel_op_272*tmp_kernel_op_41 + tmp_kernel_op_273*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_277 = tmp_kernel_op_76*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_276 - tmp_kernel_op_226);
+             const real_t tmp_kernel_op_278 = tmp_kernel_op_76*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_276 - tmp_kernel_op_229);
+             const real_t tmp_kernel_op_279 = tmp_kernel_op_277*tmp_kernel_op_71 + tmp_kernel_op_278*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_280 = tmp_kernel_op_277*tmp_kernel_op_83 + tmp_kernel_op_278*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_282 = tmp_kernel_op_114*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_281 - tmp_kernel_op_234);
+             const real_t tmp_kernel_op_283 = tmp_kernel_op_114*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_281 - tmp_kernel_op_237);
+             const real_t tmp_kernel_op_284 = tmp_kernel_op_109*tmp_kernel_op_282 + tmp_kernel_op_112*tmp_kernel_op_283;
+             const real_t tmp_kernel_op_285 = tmp_kernel_op_111*tmp_kernel_op_283 + tmp_kernel_op_121*tmp_kernel_op_282;
+             const real_t tmp_kernel_op_287 = tmp_kernel_op_152*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_286 - tmp_kernel_op_242);
+             const real_t tmp_kernel_op_288 = tmp_kernel_op_152*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_286 - tmp_kernel_op_245);
+             const real_t tmp_kernel_op_289 = tmp_kernel_op_147*tmp_kernel_op_287 + tmp_kernel_op_150*tmp_kernel_op_288;
+             const real_t tmp_kernel_op_290 = tmp_kernel_op_149*tmp_kernel_op_288 + tmp_kernel_op_159*tmp_kernel_op_287;
+             const real_t tmp_kernel_op_291 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_284 + tmp_kernel_op_122*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_289 + tmp_kernel_op_160*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_274*tmp_kernel_op_40 + tmp_kernel_op_275*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_279*tmp_kernel_op_82 + tmp_kernel_op_280*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_292 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_209 + tmp_kernel_op_189*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_214 + tmp_kernel_op_194*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_199 + tmp_kernel_op_179*tmp_kernel_op_200) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_204 + tmp_kernel_op_184*tmp_kernel_op_205);
+             const real_t tmp_kernel_op_293 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_239 + tmp_kernel_op_189*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_247 + tmp_kernel_op_194*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_223 + tmp_kernel_op_179*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_231 + tmp_kernel_op_184*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_294 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_263 + tmp_kernel_op_189*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_268 + tmp_kernel_op_194*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_253 + tmp_kernel_op_179*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_258 + tmp_kernel_op_184*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_295 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_284 + tmp_kernel_op_189*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_289 + tmp_kernel_op_194*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_274 + tmp_kernel_op_179*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_279 + tmp_kernel_op_184*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_296 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_239 + tmp_kernel_op_210*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_247 + tmp_kernel_op_215*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_223 + tmp_kernel_op_200*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_231 + tmp_kernel_op_205*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_297 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_263 + tmp_kernel_op_210*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_268 + tmp_kernel_op_215*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_253 + tmp_kernel_op_200*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_258 + tmp_kernel_op_205*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_298 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_284 + tmp_kernel_op_210*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_289 + tmp_kernel_op_215*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_274 + tmp_kernel_op_200*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_279 + tmp_kernel_op_205*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_299 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_263 + tmp_kernel_op_240*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_268 + tmp_kernel_op_248*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_253 + tmp_kernel_op_224*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_258 + tmp_kernel_op_232*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_300 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_284 + tmp_kernel_op_240*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_289 + tmp_kernel_op_248*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_274 + tmp_kernel_op_224*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_279 + tmp_kernel_op_232*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_301 = tmp_kernel_op_136*(tmp_kernel_op_263*tmp_kernel_op_284 + tmp_kernel_op_264*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_268*tmp_kernel_op_289 + tmp_kernel_op_269*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_274 + tmp_kernel_op_254*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_279 + tmp_kernel_op_259*tmp_kernel_op_280);
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_136*((tmp_kernel_op_120*tmp_kernel_op_120) + (tmp_kernel_op_122*tmp_kernel_op_122)) + tmp_kernel_op_174*((tmp_kernel_op_158*tmp_kernel_op_158) + (tmp_kernel_op_160*tmp_kernel_op_160)) + tmp_kernel_op_60*((tmp_kernel_op_40*tmp_kernel_op_40) + (tmp_kernel_op_42*tmp_kernel_op_42)) + tmp_kernel_op_98*((tmp_kernel_op_82*tmp_kernel_op_82) + (tmp_kernel_op_84*tmp_kernel_op_84))) + src_dof_1*tmp_kernel_op_195 + src_dof_2*tmp_kernel_op_216 + src_dof_3*tmp_kernel_op_249 + src_dof_4*tmp_kernel_op_270 + src_dof_5*tmp_kernel_op_291;
+             const real_t elMatVec_1 = src_dof_0*tmp_kernel_op_195 + src_dof_1*(tmp_kernel_op_136*((tmp_kernel_op_188*tmp_kernel_op_188) + (tmp_kernel_op_189*tmp_kernel_op_189)) + tmp_kernel_op_174*((tmp_kernel_op_193*tmp_kernel_op_193) + (tmp_kernel_op_194*tmp_kernel_op_194)) + tmp_kernel_op_60*((tmp_kernel_op_178*tmp_kernel_op_178) + (tmp_kernel_op_179*tmp_kernel_op_179)) + tmp_kernel_op_98*((tmp_kernel_op_183*tmp_kernel_op_183) + (tmp_kernel_op_184*tmp_kernel_op_184))) + src_dof_2*tmp_kernel_op_292 + src_dof_3*tmp_kernel_op_293 + src_dof_4*tmp_kernel_op_294 + src_dof_5*tmp_kernel_op_295;
+             const real_t elMatVec_2 = src_dof_0*tmp_kernel_op_216 + src_dof_1*tmp_kernel_op_292 + src_dof_2*(tmp_kernel_op_136*((tmp_kernel_op_209*tmp_kernel_op_209) + (tmp_kernel_op_210*tmp_kernel_op_210)) + tmp_kernel_op_174*((tmp_kernel_op_214*tmp_kernel_op_214) + (tmp_kernel_op_215*tmp_kernel_op_215)) + tmp_kernel_op_60*((tmp_kernel_op_199*tmp_kernel_op_199) + (tmp_kernel_op_200*tmp_kernel_op_200)) + tmp_kernel_op_98*((tmp_kernel_op_204*tmp_kernel_op_204) + (tmp_kernel_op_205*tmp_kernel_op_205))) + src_dof_3*tmp_kernel_op_296 + src_dof_4*tmp_kernel_op_297 + src_dof_5*tmp_kernel_op_298;
+             const real_t elMatVec_3 = src_dof_0*tmp_kernel_op_249 + src_dof_1*tmp_kernel_op_293 + src_dof_2*tmp_kernel_op_296 + src_dof_3*(tmp_kernel_op_136*((tmp_kernel_op_239*tmp_kernel_op_239) + (tmp_kernel_op_240*tmp_kernel_op_240)) + tmp_kernel_op_174*((tmp_kernel_op_247*tmp_kernel_op_247) + (tmp_kernel_op_248*tmp_kernel_op_248)) + tmp_kernel_op_60*((tmp_kernel_op_223*tmp_kernel_op_223) + (tmp_kernel_op_224*tmp_kernel_op_224)) + tmp_kernel_op_98*((tmp_kernel_op_231*tmp_kernel_op_231) + (tmp_kernel_op_232*tmp_kernel_op_232))) + src_dof_4*tmp_kernel_op_299 + src_dof_5*tmp_kernel_op_300;
+             const real_t elMatVec_4 = src_dof_0*tmp_kernel_op_270 + src_dof_1*tmp_kernel_op_294 + src_dof_2*tmp_kernel_op_297 + src_dof_3*tmp_kernel_op_299 + src_dof_4*(tmp_kernel_op_136*((tmp_kernel_op_263*tmp_kernel_op_263) + (tmp_kernel_op_264*tmp_kernel_op_264)) + tmp_kernel_op_174*((tmp_kernel_op_268*tmp_kernel_op_268) + (tmp_kernel_op_269*tmp_kernel_op_269)) + tmp_kernel_op_60*((tmp_kernel_op_253*tmp_kernel_op_253) + (tmp_kernel_op_254*tmp_kernel_op_254)) + tmp_kernel_op_98*((tmp_kernel_op_258*tmp_kernel_op_258) + (tmp_kernel_op_259*tmp_kernel_op_259))) + src_dof_5*tmp_kernel_op_301;
+             const real_t elMatVec_5 = src_dof_0*tmp_kernel_op_291 + src_dof_1*tmp_kernel_op_295 + src_dof_2*tmp_kernel_op_298 + src_dof_3*tmp_kernel_op_300 + src_dof_4*tmp_kernel_op_301 + src_dof_5*(tmp_kernel_op_136*((tmp_kernel_op_284*tmp_kernel_op_284) + (tmp_kernel_op_285*tmp_kernel_op_285)) + tmp_kernel_op_174*((tmp_kernel_op_289*tmp_kernel_op_289) + (tmp_kernel_op_290*tmp_kernel_op_290)) + tmp_kernel_op_60*((tmp_kernel_op_274*tmp_kernel_op_274) + (tmp_kernel_op_275*tmp_kernel_op_275)) + tmp_kernel_op_98*((tmp_kernel_op_279*tmp_kernel_op_279) + (tmp_kernel_op_280*tmp_kernel_op_280)));
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_2 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_78;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_77;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_78;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_77;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_116;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_115;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_116;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_115;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_154;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_153;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_154;
+       const real_t tmp_moved_constant_15 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_153;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+             const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+             const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+             const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+             const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+             const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+             const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+             const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_40 = tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -tmp_kernel_op_33;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = -p_affine_0_0;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = -p_affine_0_1;
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_46 + tmp_kernel_op_8*0.33333333333333331;
+             const real_t tmp_kernel_op_48 = (tmp_kernel_op_47*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_45 + tmp_kernel_op_48;
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_49, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_44*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_54 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_44) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_47);
+             const real_t tmp_kernel_op_55 = pow(tmp_kernel_op_49, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_47*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_44*tmp_kernel_op_47;
+             const real_t tmp_kernel_op_60 = abs_det_jac_affine_BLUE*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_53 - tmp_kernel_op_48*tmp_kernel_op_56)*(tmp_kernel_op_16*tmp_kernel_op_57 + tmp_kernel_op_45*tmp_kernel_op_58) - (tmp_kernel_op_0*tmp_kernel_op_57 + tmp_kernel_op_56*tmp_kernel_op_59)*(tmp_kernel_op_16*tmp_kernel_op_53 - tmp_kernel_op_58*tmp_kernel_op_59));
+             const real_t tmp_kernel_op_61 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+             const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+             const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = tmp_kernel_op_62 + tmp_kernel_op_64;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_21*pow(tmp_kernel_op_65, -0.50000000000000000);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_61*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = pow(tmp_kernel_op_65, -1.5000000000000000);
+             const real_t tmp_kernel_op_69 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_61) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_1*tmp_kernel_op_67 + tmp_kernel_op_64*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_73 = -tmp_kernel_op_17*tmp_kernel_op_72 + tmp_kernel_op_62*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_17*tmp_kernel_op_67 + tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_1*tmp_kernel_op_72 - tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_71*tmp_kernel_op_73 + tmp_kernel_op_74*tmp_kernel_op_75);
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_76*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_76*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_71*tmp_kernel_op_80 + tmp_kernel_op_74*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_83 = -tmp_kernel_op_75;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_73*tmp_kernel_op_81 + tmp_kernel_op_80*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_86 = (tmp_kernel_op_85*tmp_kernel_op_85);
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_88 = (tmp_kernel_op_87*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_86 + tmp_kernel_op_88;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_51*pow(tmp_kernel_op_89, -0.50000000000000000);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_85*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_92 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_85) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_87);
+             const real_t tmp_kernel_op_93 = pow(tmp_kernel_op_89, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_87*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_85*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_98 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_91 - tmp_kernel_op_88*tmp_kernel_op_94)*(tmp_kernel_op_16*tmp_kernel_op_95 + tmp_kernel_op_86*tmp_kernel_op_96) - (tmp_kernel_op_0*tmp_kernel_op_95 + tmp_kernel_op_94*tmp_kernel_op_97)*(tmp_kernel_op_16*tmp_kernel_op_91 - tmp_kernel_op_96*tmp_kernel_op_97));
+             const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+             const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_100 + tmp_kernel_op_102;
+             const real_t tmp_kernel_op_104 = pow(tmp_kernel_op_103, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_103, -1.5000000000000000);
+             const real_t tmp_kernel_op_107 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_99) - tmp_kernel_op_17*(tmp_kernel_op_101 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_106*tmp_kernel_op_107*1.0;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_105 + tmp_kernel_op_102*tmp_kernel_op_108;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_104;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_100*tmp_kernel_op_106*tmp_kernel_op_107*1.0 - tmp_kernel_op_110*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_101*tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_99*1.0 + tmp_kernel_op_105*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_1*tmp_kernel_op_110 - tmp_kernel_op_101*tmp_kernel_op_108*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_109*tmp_kernel_op_111 + tmp_kernel_op_112*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_114*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_117 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_114*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_117 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_109*tmp_kernel_op_118 + tmp_kernel_op_112*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = -tmp_kernel_op_113;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_111*tmp_kernel_op_119 + tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_124 = (tmp_kernel_op_123*tmp_kernel_op_123);
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.59999999999999998;
+             const real_t tmp_kernel_op_126 = (tmp_kernel_op_125*tmp_kernel_op_125);
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124 + tmp_kernel_op_126;
+             const real_t tmp_kernel_op_128 = pow(tmp_kernel_op_127, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_123*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_130 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_123) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_125);
+             const real_t tmp_kernel_op_131 = pow(tmp_kernel_op_127, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_132 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_125*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_123*tmp_kernel_op_125;
+             const real_t tmp_kernel_op_136 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_129 - tmp_kernel_op_126*tmp_kernel_op_132)*(tmp_kernel_op_124*tmp_kernel_op_134 + tmp_kernel_op_133*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_133 + tmp_kernel_op_132*tmp_kernel_op_135)*(tmp_kernel_op_129*tmp_kernel_op_16 - tmp_kernel_op_134*tmp_kernel_op_135));
+             const real_t tmp_kernel_op_137 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_138 = (tmp_kernel_op_137*tmp_kernel_op_137);
+             const real_t tmp_kernel_op_139 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_140 = (tmp_kernel_op_139*tmp_kernel_op_139);
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_138 + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_142 = pow(tmp_kernel_op_141, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_137*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_144 = pow(tmp_kernel_op_141, -1.5000000000000000);
+             const real_t tmp_kernel_op_145 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_137 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_139 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_144*tmp_kernel_op_145*1.0;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_1*tmp_kernel_op_143 + tmp_kernel_op_140*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_139*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_138*tmp_kernel_op_144*tmp_kernel_op_145*1.0 - tmp_kernel_op_148*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_144*tmp_kernel_op_145*1.0 + tmp_kernel_op_143*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_1*tmp_kernel_op_148 - tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_152 = 1.0 / (tmp_kernel_op_147*tmp_kernel_op_149 + tmp_kernel_op_150*tmp_kernel_op_151);
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_152*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_155 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_152*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_155 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_150*tmp_kernel_op_157;
+             const real_t tmp_kernel_op_159 = -tmp_kernel_op_151;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_149*tmp_kernel_op_157 + tmp_kernel_op_156*tmp_kernel_op_159;
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_162 = (tmp_kernel_op_161*tmp_kernel_op_161);
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_164 = (tmp_kernel_op_163*tmp_kernel_op_163);
+             const real_t tmp_kernel_op_165 = tmp_kernel_op_162 + tmp_kernel_op_164;
+             const real_t tmp_kernel_op_166 = pow(tmp_kernel_op_165, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_167 = tmp_kernel_op_161*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_168 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_161) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_163);
+             const real_t tmp_kernel_op_169 = pow(tmp_kernel_op_165, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_170 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_171 = tmp_kernel_op_163*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_172 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_173 = tmp_kernel_op_161*tmp_kernel_op_163;
+             const real_t tmp_kernel_op_174 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_167 - tmp_kernel_op_164*tmp_kernel_op_170)*(tmp_kernel_op_16*tmp_kernel_op_171 + tmp_kernel_op_162*tmp_kernel_op_172) - (tmp_kernel_op_0*tmp_kernel_op_171 + tmp_kernel_op_170*tmp_kernel_op_173)*(tmp_kernel_op_16*tmp_kernel_op_167 - tmp_kernel_op_172*tmp_kernel_op_173));
+             const real_t tmp_kernel_op_175 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+             const real_t tmp_kernel_op_176 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_177 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_178 = tmp_kernel_op_176*tmp_kernel_op_29 + tmp_kernel_op_177*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_179 = tmp_kernel_op_176*tmp_kernel_op_41 + tmp_kernel_op_177*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_180 = tmp_kernel_op_76*(tmp_kernel_op_77 - 1.0);
+             const real_t tmp_kernel_op_181 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_182 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_183 = tmp_kernel_op_181*tmp_kernel_op_71 + tmp_kernel_op_182*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_184 = tmp_kernel_op_181*tmp_kernel_op_83 + tmp_kernel_op_182*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_185 = tmp_kernel_op_114*(tmp_kernel_op_115 - 1.0);
+             const real_t tmp_kernel_op_186 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_187 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_188 = tmp_kernel_op_109*tmp_kernel_op_186 + tmp_kernel_op_112*tmp_kernel_op_187;
+             const real_t tmp_kernel_op_189 = tmp_kernel_op_111*tmp_kernel_op_187 + tmp_kernel_op_121*tmp_kernel_op_186;
+             const real_t tmp_kernel_op_190 = tmp_kernel_op_152*(tmp_kernel_op_153 - 1.0);
+             const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_192 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_193 = tmp_kernel_op_147*tmp_kernel_op_191 + tmp_kernel_op_150*tmp_kernel_op_192;
+             const real_t tmp_kernel_op_194 = tmp_kernel_op_149*tmp_kernel_op_192 + tmp_kernel_op_159*tmp_kernel_op_191;
+             const real_t tmp_kernel_op_195 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_188 + tmp_kernel_op_122*tmp_kernel_op_189) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_193 + tmp_kernel_op_160*tmp_kernel_op_194) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_40 + tmp_kernel_op_179*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_82 + tmp_kernel_op_184*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+             const real_t tmp_kernel_op_197 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_198 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_199 = tmp_kernel_op_197*tmp_kernel_op_29 + tmp_kernel_op_198*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_200 = tmp_kernel_op_197*tmp_kernel_op_41 + tmp_kernel_op_198*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_201 = tmp_kernel_op_76*(tmp_kernel_op_78 - 1.0);
+             const real_t tmp_kernel_op_202 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_203 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_204 = tmp_kernel_op_202*tmp_kernel_op_71 + tmp_kernel_op_203*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_205 = tmp_kernel_op_202*tmp_kernel_op_83 + tmp_kernel_op_203*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_206 = tmp_kernel_op_114*(tmp_kernel_op_116 - 1.0);
+             const real_t tmp_kernel_op_207 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_208 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_209 = tmp_kernel_op_109*tmp_kernel_op_207 + tmp_kernel_op_112*tmp_kernel_op_208;
+             const real_t tmp_kernel_op_210 = tmp_kernel_op_111*tmp_kernel_op_208 + tmp_kernel_op_121*tmp_kernel_op_207;
+             const real_t tmp_kernel_op_211 = tmp_kernel_op_152*(tmp_kernel_op_154 - 1.0);
+             const real_t tmp_kernel_op_212 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_214 = tmp_kernel_op_147*tmp_kernel_op_212 + tmp_kernel_op_150*tmp_kernel_op_213;
+             const real_t tmp_kernel_op_215 = tmp_kernel_op_149*tmp_kernel_op_213 + tmp_kernel_op_159*tmp_kernel_op_212;
+             const real_t tmp_kernel_op_216 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_209 + tmp_kernel_op_122*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_214 + tmp_kernel_op_160*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_40 + tmp_kernel_op_200*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_82 + tmp_kernel_op_205*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_219 = tmp_kernel_op_34*(tmp_moved_constant_0 + tmp_moved_constant_1);
+             const real_t tmp_kernel_op_222 = tmp_kernel_op_34*(tmp_moved_constant_2 + tmp_moved_constant_3);
+             const real_t tmp_kernel_op_223 = tmp_kernel_op_219*tmp_kernel_op_29 + tmp_kernel_op_222*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_224 = tmp_kernel_op_219*tmp_kernel_op_41 + tmp_kernel_op_222*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_227 = tmp_kernel_op_76*(tmp_moved_constant_4 + tmp_moved_constant_5);
+             const real_t tmp_kernel_op_230 = tmp_kernel_op_76*(tmp_moved_constant_6 + tmp_moved_constant_7);
+             const real_t tmp_kernel_op_231 = tmp_kernel_op_227*tmp_kernel_op_71 + tmp_kernel_op_230*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_232 = tmp_kernel_op_227*tmp_kernel_op_83 + tmp_kernel_op_230*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_235 = tmp_kernel_op_114*(tmp_moved_constant_8 + tmp_moved_constant_9);
+             const real_t tmp_kernel_op_238 = tmp_kernel_op_114*(tmp_moved_constant_10 + tmp_moved_constant_11);
+             const real_t tmp_kernel_op_239 = tmp_kernel_op_109*tmp_kernel_op_235 + tmp_kernel_op_112*tmp_kernel_op_238;
+             const real_t tmp_kernel_op_240 = tmp_kernel_op_111*tmp_kernel_op_238 + tmp_kernel_op_121*tmp_kernel_op_235;
+             const real_t tmp_kernel_op_243 = tmp_kernel_op_152*(tmp_moved_constant_12 + tmp_moved_constant_13);
+             const real_t tmp_kernel_op_246 = tmp_kernel_op_152*(tmp_moved_constant_14 + tmp_moved_constant_15);
+             const real_t tmp_kernel_op_247 = tmp_kernel_op_147*tmp_kernel_op_243 + tmp_kernel_op_150*tmp_kernel_op_246;
+             const real_t tmp_kernel_op_248 = tmp_kernel_op_149*tmp_kernel_op_246 + tmp_kernel_op_159*tmp_kernel_op_243;
+             const real_t tmp_kernel_op_249 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_239 + tmp_kernel_op_122*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_247 + tmp_kernel_op_160*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_40 + tmp_kernel_op_224*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_82 + tmp_kernel_op_232*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_251 = tmp_kernel_op_34*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_250 - tmp_moved_constant_0);
+             const real_t tmp_kernel_op_252 = tmp_kernel_op_34*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_250 - tmp_moved_constant_2);
+             const real_t tmp_kernel_op_253 = tmp_kernel_op_251*tmp_kernel_op_29 + tmp_kernel_op_252*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_254 = tmp_kernel_op_251*tmp_kernel_op_41 + tmp_kernel_op_252*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_256 = tmp_kernel_op_76*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_255 - tmp_moved_constant_4);
+             const real_t tmp_kernel_op_257 = tmp_kernel_op_76*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_255 - tmp_moved_constant_6);
+             const real_t tmp_kernel_op_258 = tmp_kernel_op_256*tmp_kernel_op_71 + tmp_kernel_op_257*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_259 = tmp_kernel_op_256*tmp_kernel_op_83 + tmp_kernel_op_257*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_261 = tmp_kernel_op_114*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_260 - tmp_moved_constant_8);
+             const real_t tmp_kernel_op_262 = tmp_kernel_op_114*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_260 - tmp_moved_constant_10);
+             const real_t tmp_kernel_op_263 = tmp_kernel_op_109*tmp_kernel_op_261 + tmp_kernel_op_112*tmp_kernel_op_262;
+             const real_t tmp_kernel_op_264 = tmp_kernel_op_111*tmp_kernel_op_262 + tmp_kernel_op_121*tmp_kernel_op_261;
+             const real_t tmp_kernel_op_266 = tmp_kernel_op_152*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_265 - tmp_moved_constant_12);
+             const real_t tmp_kernel_op_267 = tmp_kernel_op_152*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_265 - tmp_moved_constant_14);
+             const real_t tmp_kernel_op_268 = tmp_kernel_op_147*tmp_kernel_op_266 + tmp_kernel_op_150*tmp_kernel_op_267;
+             const real_t tmp_kernel_op_269 = tmp_kernel_op_149*tmp_kernel_op_267 + tmp_kernel_op_159*tmp_kernel_op_266;
+             const real_t tmp_kernel_op_270 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_263 + tmp_kernel_op_122*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_268 + tmp_kernel_op_160*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_40 + tmp_kernel_op_254*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_82 + tmp_kernel_op_259*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_272 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_271 - tmp_moved_constant_1);
+             const real_t tmp_kernel_op_273 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_271 - tmp_moved_constant_3);
+             const real_t tmp_kernel_op_274 = tmp_kernel_op_272*tmp_kernel_op_29 + tmp_kernel_op_273*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_275 = tmp_kernel_op_272*tmp_kernel_op_41 + tmp_kernel_op_273*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_277 = tmp_kernel_op_76*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_276 - tmp_moved_constant_5);
+             const real_t tmp_kernel_op_278 = tmp_kernel_op_76*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_276 - tmp_moved_constant_7);
+             const real_t tmp_kernel_op_279 = tmp_kernel_op_277*tmp_kernel_op_71 + tmp_kernel_op_278*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_280 = tmp_kernel_op_277*tmp_kernel_op_83 + tmp_kernel_op_278*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_282 = tmp_kernel_op_114*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_281 - tmp_moved_constant_9);
+             const real_t tmp_kernel_op_283 = tmp_kernel_op_114*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_281 - tmp_moved_constant_11);
+             const real_t tmp_kernel_op_284 = tmp_kernel_op_109*tmp_kernel_op_282 + tmp_kernel_op_112*tmp_kernel_op_283;
+             const real_t tmp_kernel_op_285 = tmp_kernel_op_111*tmp_kernel_op_283 + tmp_kernel_op_121*tmp_kernel_op_282;
+             const real_t tmp_kernel_op_287 = tmp_kernel_op_152*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_286 - tmp_moved_constant_13);
+             const real_t tmp_kernel_op_288 = tmp_kernel_op_152*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_286 - tmp_moved_constant_15);
+             const real_t tmp_kernel_op_289 = tmp_kernel_op_147*tmp_kernel_op_287 + tmp_kernel_op_150*tmp_kernel_op_288;
+             const real_t tmp_kernel_op_290 = tmp_kernel_op_149*tmp_kernel_op_288 + tmp_kernel_op_159*tmp_kernel_op_287;
+             const real_t tmp_kernel_op_291 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_284 + tmp_kernel_op_122*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_289 + tmp_kernel_op_160*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_274*tmp_kernel_op_40 + tmp_kernel_op_275*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_279*tmp_kernel_op_82 + tmp_kernel_op_280*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_292 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_209 + tmp_kernel_op_189*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_214 + tmp_kernel_op_194*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_199 + tmp_kernel_op_179*tmp_kernel_op_200) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_204 + tmp_kernel_op_184*tmp_kernel_op_205);
+             const real_t tmp_kernel_op_293 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_239 + tmp_kernel_op_189*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_247 + tmp_kernel_op_194*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_223 + tmp_kernel_op_179*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_231 + tmp_kernel_op_184*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_294 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_263 + tmp_kernel_op_189*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_268 + tmp_kernel_op_194*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_253 + tmp_kernel_op_179*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_258 + tmp_kernel_op_184*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_295 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_284 + tmp_kernel_op_189*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_289 + tmp_kernel_op_194*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_274 + tmp_kernel_op_179*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_279 + tmp_kernel_op_184*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_296 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_239 + tmp_kernel_op_210*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_247 + tmp_kernel_op_215*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_223 + tmp_kernel_op_200*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_231 + tmp_kernel_op_205*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_297 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_263 + tmp_kernel_op_210*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_268 + tmp_kernel_op_215*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_253 + tmp_kernel_op_200*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_258 + tmp_kernel_op_205*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_298 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_284 + tmp_kernel_op_210*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_289 + tmp_kernel_op_215*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_274 + tmp_kernel_op_200*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_279 + tmp_kernel_op_205*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_299 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_263 + tmp_kernel_op_240*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_268 + tmp_kernel_op_248*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_253 + tmp_kernel_op_224*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_258 + tmp_kernel_op_232*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_300 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_284 + tmp_kernel_op_240*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_289 + tmp_kernel_op_248*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_274 + tmp_kernel_op_224*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_279 + tmp_kernel_op_232*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_301 = tmp_kernel_op_136*(tmp_kernel_op_263*tmp_kernel_op_284 + tmp_kernel_op_264*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_268*tmp_kernel_op_289 + tmp_kernel_op_269*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_274 + tmp_kernel_op_254*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_279 + tmp_kernel_op_259*tmp_kernel_op_280);
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_136*((tmp_kernel_op_120*tmp_kernel_op_120) + (tmp_kernel_op_122*tmp_kernel_op_122)) + tmp_kernel_op_174*((tmp_kernel_op_158*tmp_kernel_op_158) + (tmp_kernel_op_160*tmp_kernel_op_160)) + tmp_kernel_op_60*((tmp_kernel_op_40*tmp_kernel_op_40) + (tmp_kernel_op_42*tmp_kernel_op_42)) + tmp_kernel_op_98*((tmp_kernel_op_82*tmp_kernel_op_82) + (tmp_kernel_op_84*tmp_kernel_op_84))) + src_dof_1*tmp_kernel_op_195 + src_dof_2*tmp_kernel_op_216 + src_dof_3*tmp_kernel_op_249 + src_dof_4*tmp_kernel_op_270 + src_dof_5*tmp_kernel_op_291;
+             const real_t elMatVec_1 = src_dof_0*tmp_kernel_op_195 + src_dof_1*(tmp_kernel_op_136*((tmp_kernel_op_188*tmp_kernel_op_188) + (tmp_kernel_op_189*tmp_kernel_op_189)) + tmp_kernel_op_174*((tmp_kernel_op_193*tmp_kernel_op_193) + (tmp_kernel_op_194*tmp_kernel_op_194)) + tmp_kernel_op_60*((tmp_kernel_op_178*tmp_kernel_op_178) + (tmp_kernel_op_179*tmp_kernel_op_179)) + tmp_kernel_op_98*((tmp_kernel_op_183*tmp_kernel_op_183) + (tmp_kernel_op_184*tmp_kernel_op_184))) + src_dof_2*tmp_kernel_op_292 + src_dof_3*tmp_kernel_op_293 + src_dof_4*tmp_kernel_op_294 + src_dof_5*tmp_kernel_op_295;
+             const real_t elMatVec_2 = src_dof_0*tmp_kernel_op_216 + src_dof_1*tmp_kernel_op_292 + src_dof_2*(tmp_kernel_op_136*((tmp_kernel_op_209*tmp_kernel_op_209) + (tmp_kernel_op_210*tmp_kernel_op_210)) + tmp_kernel_op_174*((tmp_kernel_op_214*tmp_kernel_op_214) + (tmp_kernel_op_215*tmp_kernel_op_215)) + tmp_kernel_op_60*((tmp_kernel_op_199*tmp_kernel_op_199) + (tmp_kernel_op_200*tmp_kernel_op_200)) + tmp_kernel_op_98*((tmp_kernel_op_204*tmp_kernel_op_204) + (tmp_kernel_op_205*tmp_kernel_op_205))) + src_dof_3*tmp_kernel_op_296 + src_dof_4*tmp_kernel_op_297 + src_dof_5*tmp_kernel_op_298;
+             const real_t elMatVec_3 = src_dof_0*tmp_kernel_op_249 + src_dof_1*tmp_kernel_op_293 + src_dof_2*tmp_kernel_op_296 + src_dof_3*(tmp_kernel_op_136*((tmp_kernel_op_239*tmp_kernel_op_239) + (tmp_kernel_op_240*tmp_kernel_op_240)) + tmp_kernel_op_174*((tmp_kernel_op_247*tmp_kernel_op_247) + (tmp_kernel_op_248*tmp_kernel_op_248)) + tmp_kernel_op_60*((tmp_kernel_op_223*tmp_kernel_op_223) + (tmp_kernel_op_224*tmp_kernel_op_224)) + tmp_kernel_op_98*((tmp_kernel_op_231*tmp_kernel_op_231) + (tmp_kernel_op_232*tmp_kernel_op_232))) + src_dof_4*tmp_kernel_op_299 + src_dof_5*tmp_kernel_op_300;
+             const real_t elMatVec_4 = src_dof_0*tmp_kernel_op_270 + src_dof_1*tmp_kernel_op_294 + src_dof_2*tmp_kernel_op_297 + src_dof_3*tmp_kernel_op_299 + src_dof_4*(tmp_kernel_op_136*((tmp_kernel_op_263*tmp_kernel_op_263) + (tmp_kernel_op_264*tmp_kernel_op_264)) + tmp_kernel_op_174*((tmp_kernel_op_268*tmp_kernel_op_268) + (tmp_kernel_op_269*tmp_kernel_op_269)) + tmp_kernel_op_60*((tmp_kernel_op_253*tmp_kernel_op_253) + (tmp_kernel_op_254*tmp_kernel_op_254)) + tmp_kernel_op_98*((tmp_kernel_op_258*tmp_kernel_op_258) + (tmp_kernel_op_259*tmp_kernel_op_259))) + src_dof_5*tmp_kernel_op_301;
+             const real_t elMatVec_5 = src_dof_0*tmp_kernel_op_291 + src_dof_1*tmp_kernel_op_295 + src_dof_2*tmp_kernel_op_298 + src_dof_3*tmp_kernel_op_300 + src_dof_4*tmp_kernel_op_301 + src_dof_5*(tmp_kernel_op_136*((tmp_kernel_op_284*tmp_kernel_op_284) + (tmp_kernel_op_285*tmp_kernel_op_285)) + tmp_kernel_op_174*((tmp_kernel_op_289*tmp_kernel_op_289) + (tmp_kernel_op_290*tmp_kernel_op_290)) + tmp_kernel_op_60*((tmp_kernel_op_274*tmp_kernel_op_274) + (tmp_kernel_op_275*tmp_kernel_op_275)) + tmp_kernel_op_98*((tmp_kernel_op_279*tmp_kernel_op_279) + (tmp_kernel_op_280*tmp_kernel_op_280)));
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..289e89c0720a603b7a88ebf5aacf3720d5c9a81b
--- /dev/null
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,629 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_1 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_15 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_16 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_17 = -tmp_kernel_op_16;
+       const real_t tmp_kernel_op_18 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_19 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19*1.0 / (-tmp_kernel_op_1*tmp_kernel_op_15 + tmp_kernel_op_17*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_21 = tmp_kernel_op_20*1.0;
+       const real_t tmp_kernel_op_25 = -rayVertex_1;
+       const real_t tmp_kernel_op_26 = -rayVertex_0;
+       const real_t tmp_kernel_op_35 = 1.3333333333333333;
+       const real_t tmp_kernel_op_36 = 1.3333333333333333;
+       const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36 - 3.0;
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_19*1.0 / (tmp_kernel_op_0*tmp_kernel_op_15 - tmp_kernel_op_16*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_49 = tmp_kernel_op_48*1.0;
+       const real_t tmp_kernel_op_75 = 0.80000000000000004;
+       const real_t tmp_kernel_op_76 = 2.3999999999999999;
+       const real_t tmp_kernel_op_77 = tmp_kernel_op_75 + tmp_kernel_op_76 - 3.0;
+       const real_t tmp_kernel_op_111 = 2.3999999999999999;
+       const real_t tmp_kernel_op_112 = 0.80000000000000004;
+       const real_t tmp_kernel_op_113 = tmp_kernel_op_111 + tmp_kernel_op_112 - 3.0;
+       const real_t tmp_kernel_op_147 = 0.80000000000000004;
+       const real_t tmp_kernel_op_148 = 0.80000000000000004;
+       const real_t tmp_kernel_op_149 = tmp_kernel_op_147 + tmp_kernel_op_148 - 3.0;
+       const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_192 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_194 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_195 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_197 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_76;
+       const real_t tmp_kernel_op_198 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_75;
+       const real_t tmp_kernel_op_200 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_76;
+       const real_t tmp_kernel_op_201 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_75;
+       const real_t tmp_kernel_op_203 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_112;
+       const real_t tmp_kernel_op_204 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_111;
+       const real_t tmp_kernel_op_206 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_112;
+       const real_t tmp_kernel_op_207 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_111;
+       const real_t tmp_kernel_op_209 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_148;
+       const real_t tmp_kernel_op_210 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_147;
+       const real_t tmp_kernel_op_212 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_148;
+       const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_147;
+       const real_t tmp_kernel_op_215 = -tmp_kernel_op_35 + 1.3333333333333335;
+       const real_t tmp_kernel_op_218 = -tmp_kernel_op_75 - 0.79999999999999982;
+       const real_t tmp_kernel_op_221 = -tmp_kernel_op_111 + 2.3999999999999999;
+       const real_t tmp_kernel_op_224 = -tmp_kernel_op_147 + 2.3999999999999999;
+       const real_t tmp_kernel_op_227 = -tmp_kernel_op_36 + 1.3333333333333335;
+       const real_t tmp_kernel_op_230 = -tmp_kernel_op_76 + 2.3999999999999999;
+       const real_t tmp_kernel_op_233 = -tmp_kernel_op_112 - 0.79999999999999982;
+       const real_t tmp_kernel_op_236 = -tmp_kernel_op_148 + 2.3999999999999999;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+             const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+             const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+             const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+             const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+             const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+             const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+             const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_40 = -tmp_kernel_op_33;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_44 + tmp_kernel_op_8*0.33333333333333331;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_45);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_GRAY*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_16*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_0*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_16*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+             const real_t tmp_kernel_op_59 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+             const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+             const real_t tmp_kernel_op_61 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+             const real_t tmp_kernel_op_63 = tmp_kernel_op_60 + tmp_kernel_op_62;
+             const real_t tmp_kernel_op_64 = tmp_kernel_op_21*pow(tmp_kernel_op_63, -0.50000000000000000);
+             const real_t tmp_kernel_op_65 = tmp_kernel_op_59*tmp_kernel_op_64;
+             const real_t tmp_kernel_op_66 = pow(tmp_kernel_op_63, -1.5000000000000000);
+             const real_t tmp_kernel_op_67 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_59) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_61));
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_1*tmp_kernel_op_65 + tmp_kernel_op_62*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_61*tmp_kernel_op_64;
+             const real_t tmp_kernel_op_71 = -tmp_kernel_op_17*tmp_kernel_op_70 + tmp_kernel_op_60*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_17*tmp_kernel_op_65 + tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_1*tmp_kernel_op_70 - tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_74 = 1.0 / (tmp_kernel_op_69*tmp_kernel_op_71 + tmp_kernel_op_72*tmp_kernel_op_73);
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_74*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_77);
+             const real_t tmp_kernel_op_79 = tmp_kernel_op_74*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_77);
+             const real_t tmp_kernel_op_80 = -tmp_kernel_op_73;
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_82 = (tmp_kernel_op_81*tmp_kernel_op_81);
+             const real_t tmp_kernel_op_83 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_84 = (tmp_kernel_op_83*tmp_kernel_op_83);
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_82 + tmp_kernel_op_84;
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_49*pow(tmp_kernel_op_85, -0.50000000000000000);
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_81*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_88 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_81) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_83);
+             const real_t tmp_kernel_op_89 = pow(tmp_kernel_op_85, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_83*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_81*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_94 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_87 - tmp_kernel_op_84*tmp_kernel_op_90)*(tmp_kernel_op_16*tmp_kernel_op_91 + tmp_kernel_op_82*tmp_kernel_op_92) - (tmp_kernel_op_0*tmp_kernel_op_91 + tmp_kernel_op_90*tmp_kernel_op_93)*(tmp_kernel_op_16*tmp_kernel_op_87 - tmp_kernel_op_92*tmp_kernel_op_93));
+             const real_t tmp_kernel_op_95 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_96 = (tmp_kernel_op_95*tmp_kernel_op_95);
+             const real_t tmp_kernel_op_97 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+             const real_t tmp_kernel_op_98 = (tmp_kernel_op_97*tmp_kernel_op_97);
+             const real_t tmp_kernel_op_99 = tmp_kernel_op_96 + tmp_kernel_op_98;
+             const real_t tmp_kernel_op_100 = tmp_kernel_op_21*pow(tmp_kernel_op_99, -0.50000000000000000);
+             const real_t tmp_kernel_op_101 = tmp_kernel_op_100*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_102 = pow(tmp_kernel_op_99, -1.5000000000000000);
+             const real_t tmp_kernel_op_103 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_95) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_97));
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_102*tmp_kernel_op_103*1.0;
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_1*tmp_kernel_op_101 + tmp_kernel_op_104*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_96*1.0 - tmp_kernel_op_106*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_17 + tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_95*tmp_kernel_op_97*1.0;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_106 - tmp_kernel_op_104*tmp_kernel_op_95*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_110 = 1.0 / (tmp_kernel_op_105*tmp_kernel_op_107 + tmp_kernel_op_108*tmp_kernel_op_109);
+             const real_t tmp_kernel_op_114 = tmp_kernel_op_110*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_113 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_110*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_113 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_116 = -tmp_kernel_op_109;
+             const real_t tmp_kernel_op_117 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_118 = (tmp_kernel_op_117*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.59999999999999998;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_118 + tmp_kernel_op_120;
+             const real_t tmp_kernel_op_122 = pow(tmp_kernel_op_121, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_117*tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_117) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_119);
+             const real_t tmp_kernel_op_125 = pow(tmp_kernel_op_121, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_119*tmp_kernel_op_122;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_117*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_130 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_123 - tmp_kernel_op_120*tmp_kernel_op_126)*(tmp_kernel_op_118*tmp_kernel_op_128 + tmp_kernel_op_127*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_129)*(tmp_kernel_op_123*tmp_kernel_op_16 - tmp_kernel_op_128*tmp_kernel_op_129));
+             const real_t tmp_kernel_op_131 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_132 = (tmp_kernel_op_131*tmp_kernel_op_131);
+             const real_t tmp_kernel_op_133 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_134 = (tmp_kernel_op_133*tmp_kernel_op_133);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_132 + tmp_kernel_op_134;
+             const real_t tmp_kernel_op_136 = pow(tmp_kernel_op_135, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_131*tmp_kernel_op_136;
+             const real_t tmp_kernel_op_138 = pow(tmp_kernel_op_135, -1.5000000000000000);
+             const real_t tmp_kernel_op_139 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_131 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_133 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_138*tmp_kernel_op_139*1.0;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_137 + tmp_kernel_op_134*tmp_kernel_op_140;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_133*tmp_kernel_op_136;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_132*tmp_kernel_op_138*tmp_kernel_op_139*1.0 - tmp_kernel_op_142*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_138*tmp_kernel_op_139*1.0 + tmp_kernel_op_137*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_1*tmp_kernel_op_142 - tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_140;
+             const real_t tmp_kernel_op_146 = 1.0 / (tmp_kernel_op_141*tmp_kernel_op_143 + tmp_kernel_op_144*tmp_kernel_op_145);
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_146*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_149 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_149);
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_146*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_149 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_149);
+             const real_t tmp_kernel_op_152 = -tmp_kernel_op_145;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_154 = (tmp_kernel_op_153*tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_156 = (tmp_kernel_op_155*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_154 + tmp_kernel_op_156;
+             const real_t tmp_kernel_op_158 = pow(tmp_kernel_op_157, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_153*tmp_kernel_op_158;
+             const real_t tmp_kernel_op_160 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_153) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_155);
+             const real_t tmp_kernel_op_161 = pow(tmp_kernel_op_157, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_155*tmp_kernel_op_158;
+             const real_t tmp_kernel_op_164 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_165 = tmp_kernel_op_153*tmp_kernel_op_155;
+             const real_t tmp_kernel_op_166 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_159 - tmp_kernel_op_156*tmp_kernel_op_162)*(tmp_kernel_op_154*tmp_kernel_op_164 + tmp_kernel_op_16*tmp_kernel_op_163) - (tmp_kernel_op_0*tmp_kernel_op_163 + tmp_kernel_op_162*tmp_kernel_op_165)*(tmp_kernel_op_159*tmp_kernel_op_16 - tmp_kernel_op_164*tmp_kernel_op_165));
+             const real_t tmp_kernel_op_167 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+             const real_t tmp_kernel_op_168 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_167;
+             const real_t tmp_kernel_op_169 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_167;
+             const real_t tmp_kernel_op_170 = tmp_kernel_op_74*(tmp_kernel_op_75 - 1.0);
+             const real_t tmp_kernel_op_171 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_170;
+             const real_t tmp_kernel_op_172 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_170;
+             const real_t tmp_kernel_op_173 = tmp_kernel_op_110*(tmp_kernel_op_111 - 1.0);
+             const real_t tmp_kernel_op_174 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_173;
+             const real_t tmp_kernel_op_175 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_173;
+             const real_t tmp_kernel_op_176 = tmp_kernel_op_146*(tmp_kernel_op_147 - 1.0);
+             const real_t tmp_kernel_op_177 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_176;
+             const real_t tmp_kernel_op_178 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_176;
+             const real_t tmp_kernel_op_179 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+             const real_t tmp_kernel_op_180 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_179;
+             const real_t tmp_kernel_op_181 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_179;
+             const real_t tmp_kernel_op_182 = tmp_kernel_op_74*(tmp_kernel_op_76 - 1.0);
+             const real_t tmp_kernel_op_183 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_182;
+             const real_t tmp_kernel_op_184 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_182;
+             const real_t tmp_kernel_op_185 = tmp_kernel_op_110*(tmp_kernel_op_112 - 1.0);
+             const real_t tmp_kernel_op_186 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_187 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_188 = tmp_kernel_op_146*(tmp_kernel_op_148 - 1.0);
+             const real_t tmp_kernel_op_189 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_188;
+             const real_t tmp_kernel_op_190 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_188;
+             const real_t tmp_kernel_op_193 = tmp_kernel_op_34*(tmp_kernel_op_191 + tmp_kernel_op_192);
+             const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_194 + tmp_kernel_op_195);
+             const real_t tmp_kernel_op_199 = tmp_kernel_op_74*(tmp_kernel_op_197 + tmp_kernel_op_198);
+             const real_t tmp_kernel_op_202 = tmp_kernel_op_74*(tmp_kernel_op_200 + tmp_kernel_op_201);
+             const real_t tmp_kernel_op_205 = tmp_kernel_op_110*(tmp_kernel_op_203 + tmp_kernel_op_204);
+             const real_t tmp_kernel_op_208 = tmp_kernel_op_110*(tmp_kernel_op_206 + tmp_kernel_op_207);
+             const real_t tmp_kernel_op_211 = tmp_kernel_op_146*(tmp_kernel_op_209 + tmp_kernel_op_210);
+             const real_t tmp_kernel_op_214 = tmp_kernel_op_146*(tmp_kernel_op_212 + tmp_kernel_op_213);
+             const real_t tmp_kernel_op_216 = tmp_kernel_op_34*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_215 - tmp_kernel_op_191);
+             const real_t tmp_kernel_op_217 = tmp_kernel_op_34*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_215 - tmp_kernel_op_194);
+             const real_t tmp_kernel_op_219 = tmp_kernel_op_74*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_218 - tmp_kernel_op_197);
+             const real_t tmp_kernel_op_220 = tmp_kernel_op_74*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_218 - tmp_kernel_op_200);
+             const real_t tmp_kernel_op_222 = tmp_kernel_op_110*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_221 - tmp_kernel_op_203);
+             const real_t tmp_kernel_op_223 = tmp_kernel_op_110*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_221 - tmp_kernel_op_206);
+             const real_t tmp_kernel_op_225 = tmp_kernel_op_146*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_224 - tmp_kernel_op_209);
+             const real_t tmp_kernel_op_226 = tmp_kernel_op_146*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_224 - tmp_kernel_op_212);
+             const real_t tmp_kernel_op_228 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_227 - tmp_kernel_op_192);
+             const real_t tmp_kernel_op_229 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_227 - tmp_kernel_op_195);
+             const real_t tmp_kernel_op_231 = tmp_kernel_op_74*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_230 - tmp_kernel_op_198);
+             const real_t tmp_kernel_op_232 = tmp_kernel_op_74*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_230 - tmp_kernel_op_201);
+             const real_t tmp_kernel_op_234 = tmp_kernel_op_110*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_233 - tmp_kernel_op_204);
+             const real_t tmp_kernel_op_235 = tmp_kernel_op_110*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_233 - tmp_kernel_op_207);
+             const real_t tmp_kernel_op_237 = tmp_kernel_op_146*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_236 - tmp_kernel_op_210);
+             const real_t tmp_kernel_op_238 = tmp_kernel_op_146*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_236 - tmp_kernel_op_213);
+             const real_t elMatDiag_0 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)*(tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)) + ((tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116)*(tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)*(tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)) + ((tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152)*(tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152))) + tmp_kernel_op_58*(((tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)*(tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)) + ((tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40)*(tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40))) + tmp_kernel_op_94*(((tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)*(tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)) + ((tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)*(tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)));
+             const real_t elMatDiag_1 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)*(tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)) + ((tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174)*(tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)*(tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)) + ((tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177)*(tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177))) + tmp_kernel_op_58*(((tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)*(tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)) + ((tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31)*(tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)*(tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)) + ((tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)*(tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)));
+             const real_t elMatDiag_2 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)*(tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)) + ((tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186)*(tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)*(tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)) + ((tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189)*(tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189))) + tmp_kernel_op_58*(((tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)*(tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)) + ((tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31)*(tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)*(tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)) + ((tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)*(tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)));
+             const real_t elMatDiag_3 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)*(tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)) + ((tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205)*(tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)*(tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)) + ((tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211)*(tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211))) + tmp_kernel_op_58*(((tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)*(tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)) + ((tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31)*(tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)*(tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)) + ((tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)*(tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)));
+             const real_t elMatDiag_4 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)*(tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)) + ((tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222)*(tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)*(tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)) + ((tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225)*(tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225))) + tmp_kernel_op_58*(((tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)*(tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)) + ((tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31)*(tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)*(tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)) + ((tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)*(tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)));
+             const real_t elMatDiag_5 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)*(tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)) + ((tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234)*(tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)*(tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)) + ((tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237)*(tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237))) + tmp_kernel_op_58*(((tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)*(tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)) + ((tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31)*(tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)*(tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)) + ((tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)*(tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)));
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_2 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_76;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_75;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_76;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_75;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_112;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_111;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_112;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_111;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_148;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_147;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_148;
+       const real_t tmp_moved_constant_15 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_147;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+             const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+             const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+             const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+             const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+             const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+             const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+             const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_40 = -tmp_kernel_op_33;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_44 + tmp_kernel_op_8*0.33333333333333331;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_45);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_BLUE*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_16*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_0*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_16*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+             const real_t tmp_kernel_op_59 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+             const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+             const real_t tmp_kernel_op_61 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+             const real_t tmp_kernel_op_63 = tmp_kernel_op_60 + tmp_kernel_op_62;
+             const real_t tmp_kernel_op_64 = tmp_kernel_op_21*pow(tmp_kernel_op_63, -0.50000000000000000);
+             const real_t tmp_kernel_op_65 = tmp_kernel_op_59*tmp_kernel_op_64;
+             const real_t tmp_kernel_op_66 = pow(tmp_kernel_op_63, -1.5000000000000000);
+             const real_t tmp_kernel_op_67 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_59) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_61));
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_1*tmp_kernel_op_65 + tmp_kernel_op_62*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_61*tmp_kernel_op_64;
+             const real_t tmp_kernel_op_71 = -tmp_kernel_op_17*tmp_kernel_op_70 + tmp_kernel_op_60*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_17*tmp_kernel_op_65 + tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_66*tmp_kernel_op_67*1.0;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_1*tmp_kernel_op_70 - tmp_kernel_op_59*tmp_kernel_op_61*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_74 = 1.0 / (tmp_kernel_op_69*tmp_kernel_op_71 + tmp_kernel_op_72*tmp_kernel_op_73);
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_74*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_77);
+             const real_t tmp_kernel_op_79 = tmp_kernel_op_74*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_77);
+             const real_t tmp_kernel_op_80 = -tmp_kernel_op_73;
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_82 = (tmp_kernel_op_81*tmp_kernel_op_81);
+             const real_t tmp_kernel_op_83 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_84 = (tmp_kernel_op_83*tmp_kernel_op_83);
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_82 + tmp_kernel_op_84;
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_49*pow(tmp_kernel_op_85, -0.50000000000000000);
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_81*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_88 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_81) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_83);
+             const real_t tmp_kernel_op_89 = pow(tmp_kernel_op_85, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_83*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_89*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_88);
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_81*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_94 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_87 - tmp_kernel_op_84*tmp_kernel_op_90)*(tmp_kernel_op_16*tmp_kernel_op_91 + tmp_kernel_op_82*tmp_kernel_op_92) - (tmp_kernel_op_0*tmp_kernel_op_91 + tmp_kernel_op_90*tmp_kernel_op_93)*(tmp_kernel_op_16*tmp_kernel_op_87 - tmp_kernel_op_92*tmp_kernel_op_93));
+             const real_t tmp_kernel_op_95 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_96 = (tmp_kernel_op_95*tmp_kernel_op_95);
+             const real_t tmp_kernel_op_97 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+             const real_t tmp_kernel_op_98 = (tmp_kernel_op_97*tmp_kernel_op_97);
+             const real_t tmp_kernel_op_99 = tmp_kernel_op_96 + tmp_kernel_op_98;
+             const real_t tmp_kernel_op_100 = tmp_kernel_op_21*pow(tmp_kernel_op_99, -0.50000000000000000);
+             const real_t tmp_kernel_op_101 = tmp_kernel_op_100*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_102 = pow(tmp_kernel_op_99, -1.5000000000000000);
+             const real_t tmp_kernel_op_103 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_95) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_97));
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_102*tmp_kernel_op_103*1.0;
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_1*tmp_kernel_op_101 + tmp_kernel_op_104*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_96*1.0 - tmp_kernel_op_106*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_17 + tmp_kernel_op_102*tmp_kernel_op_103*tmp_kernel_op_95*tmp_kernel_op_97*1.0;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_106 - tmp_kernel_op_104*tmp_kernel_op_95*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_110 = 1.0 / (tmp_kernel_op_105*tmp_kernel_op_107 + tmp_kernel_op_108*tmp_kernel_op_109);
+             const real_t tmp_kernel_op_114 = tmp_kernel_op_110*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_113 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_110*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_113 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_116 = -tmp_kernel_op_109;
+             const real_t tmp_kernel_op_117 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_118 = (tmp_kernel_op_117*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.59999999999999998;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_118 + tmp_kernel_op_120;
+             const real_t tmp_kernel_op_122 = pow(tmp_kernel_op_121, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_117*tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_117) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_119);
+             const real_t tmp_kernel_op_125 = pow(tmp_kernel_op_121, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_119*tmp_kernel_op_122;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_125*(radRayVertex + tmp_kernel_op_124*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_117*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_130 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_123 - tmp_kernel_op_120*tmp_kernel_op_126)*(tmp_kernel_op_118*tmp_kernel_op_128 + tmp_kernel_op_127*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_129)*(tmp_kernel_op_123*tmp_kernel_op_16 - tmp_kernel_op_128*tmp_kernel_op_129));
+             const real_t tmp_kernel_op_131 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_132 = (tmp_kernel_op_131*tmp_kernel_op_131);
+             const real_t tmp_kernel_op_133 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_134 = (tmp_kernel_op_133*tmp_kernel_op_133);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_132 + tmp_kernel_op_134;
+             const real_t tmp_kernel_op_136 = pow(tmp_kernel_op_135, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_131*tmp_kernel_op_136;
+             const real_t tmp_kernel_op_138 = pow(tmp_kernel_op_135, -1.5000000000000000);
+             const real_t tmp_kernel_op_139 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_131 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_133 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_138*tmp_kernel_op_139*1.0;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_137 + tmp_kernel_op_134*tmp_kernel_op_140;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_133*tmp_kernel_op_136;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_132*tmp_kernel_op_138*tmp_kernel_op_139*1.0 - tmp_kernel_op_142*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_138*tmp_kernel_op_139*1.0 + tmp_kernel_op_137*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_1*tmp_kernel_op_142 - tmp_kernel_op_131*tmp_kernel_op_133*tmp_kernel_op_140;
+             const real_t tmp_kernel_op_146 = 1.0 / (tmp_kernel_op_141*tmp_kernel_op_143 + tmp_kernel_op_144*tmp_kernel_op_145);
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_146*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_149 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_149);
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_146*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_149 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_149);
+             const real_t tmp_kernel_op_152 = -tmp_kernel_op_145;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_154 = (tmp_kernel_op_153*tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_44 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_156 = (tmp_kernel_op_155*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_154 + tmp_kernel_op_156;
+             const real_t tmp_kernel_op_158 = pow(tmp_kernel_op_157, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_153*tmp_kernel_op_158;
+             const real_t tmp_kernel_op_160 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_153) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_155);
+             const real_t tmp_kernel_op_161 = pow(tmp_kernel_op_157, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_155*tmp_kernel_op_158;
+             const real_t tmp_kernel_op_164 = tmp_kernel_op_161*(radRayVertex + tmp_kernel_op_160*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_165 = tmp_kernel_op_153*tmp_kernel_op_155;
+             const real_t tmp_kernel_op_166 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_159 - tmp_kernel_op_156*tmp_kernel_op_162)*(tmp_kernel_op_154*tmp_kernel_op_164 + tmp_kernel_op_16*tmp_kernel_op_163) - (tmp_kernel_op_0*tmp_kernel_op_163 + tmp_kernel_op_162*tmp_kernel_op_165)*(tmp_kernel_op_159*tmp_kernel_op_16 - tmp_kernel_op_164*tmp_kernel_op_165));
+             const real_t tmp_kernel_op_167 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+             const real_t tmp_kernel_op_168 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_167;
+             const real_t tmp_kernel_op_169 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_167;
+             const real_t tmp_kernel_op_170 = tmp_kernel_op_74*(tmp_kernel_op_75 - 1.0);
+             const real_t tmp_kernel_op_171 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_170;
+             const real_t tmp_kernel_op_172 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_170;
+             const real_t tmp_kernel_op_173 = tmp_kernel_op_110*(tmp_kernel_op_111 - 1.0);
+             const real_t tmp_kernel_op_174 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_173;
+             const real_t tmp_kernel_op_175 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_173;
+             const real_t tmp_kernel_op_176 = tmp_kernel_op_146*(tmp_kernel_op_147 - 1.0);
+             const real_t tmp_kernel_op_177 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_176;
+             const real_t tmp_kernel_op_178 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_176;
+             const real_t tmp_kernel_op_179 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+             const real_t tmp_kernel_op_180 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_179;
+             const real_t tmp_kernel_op_181 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_179;
+             const real_t tmp_kernel_op_182 = tmp_kernel_op_74*(tmp_kernel_op_76 - 1.0);
+             const real_t tmp_kernel_op_183 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_182;
+             const real_t tmp_kernel_op_184 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_182;
+             const real_t tmp_kernel_op_185 = tmp_kernel_op_110*(tmp_kernel_op_112 - 1.0);
+             const real_t tmp_kernel_op_186 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_187 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_188 = tmp_kernel_op_146*(tmp_kernel_op_148 - 1.0);
+             const real_t tmp_kernel_op_189 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_188;
+             const real_t tmp_kernel_op_190 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_188;
+             const real_t tmp_kernel_op_193 = tmp_kernel_op_34*(tmp_moved_constant_0 + tmp_moved_constant_1);
+             const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_moved_constant_2 + tmp_moved_constant_3);
+             const real_t tmp_kernel_op_199 = tmp_kernel_op_74*(tmp_moved_constant_4 + tmp_moved_constant_5);
+             const real_t tmp_kernel_op_202 = tmp_kernel_op_74*(tmp_moved_constant_6 + tmp_moved_constant_7);
+             const real_t tmp_kernel_op_205 = tmp_kernel_op_110*(tmp_moved_constant_8 + tmp_moved_constant_9);
+             const real_t tmp_kernel_op_208 = tmp_kernel_op_110*(tmp_moved_constant_10 + tmp_moved_constant_11);
+             const real_t tmp_kernel_op_211 = tmp_kernel_op_146*(tmp_moved_constant_12 + tmp_moved_constant_13);
+             const real_t tmp_kernel_op_214 = tmp_kernel_op_146*(tmp_moved_constant_14 + tmp_moved_constant_15);
+             const real_t tmp_kernel_op_216 = tmp_kernel_op_34*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_215 - tmp_moved_constant_0);
+             const real_t tmp_kernel_op_217 = tmp_kernel_op_34*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_215 - tmp_moved_constant_2);
+             const real_t tmp_kernel_op_219 = tmp_kernel_op_74*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_218 - tmp_moved_constant_4);
+             const real_t tmp_kernel_op_220 = tmp_kernel_op_74*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_218 - tmp_moved_constant_6);
+             const real_t tmp_kernel_op_222 = tmp_kernel_op_110*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_221 - tmp_moved_constant_8);
+             const real_t tmp_kernel_op_223 = tmp_kernel_op_110*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_221 - tmp_moved_constant_10);
+             const real_t tmp_kernel_op_225 = tmp_kernel_op_146*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_224 - tmp_moved_constant_12);
+             const real_t tmp_kernel_op_226 = tmp_kernel_op_146*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_224 - tmp_moved_constant_14);
+             const real_t tmp_kernel_op_228 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_227 - tmp_moved_constant_1);
+             const real_t tmp_kernel_op_229 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_227 - tmp_moved_constant_3);
+             const real_t tmp_kernel_op_231 = tmp_kernel_op_74*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_230 - tmp_moved_constant_5);
+             const real_t tmp_kernel_op_232 = tmp_kernel_op_74*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_230 - tmp_moved_constant_7);
+             const real_t tmp_kernel_op_234 = tmp_kernel_op_110*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_233 - tmp_moved_constant_9);
+             const real_t tmp_kernel_op_235 = tmp_kernel_op_110*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_233 - tmp_moved_constant_11);
+             const real_t tmp_kernel_op_237 = tmp_kernel_op_146*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_236 - tmp_moved_constant_13);
+             const real_t tmp_kernel_op_238 = tmp_kernel_op_146*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_236 - tmp_moved_constant_15);
+             const real_t elMatDiag_0 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)*(tmp_kernel_op_105*tmp_kernel_op_114 + tmp_kernel_op_108*tmp_kernel_op_115)) + ((tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116)*(tmp_kernel_op_107*tmp_kernel_op_115 + tmp_kernel_op_114*tmp_kernel_op_116))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)*(tmp_kernel_op_141*tmp_kernel_op_150 + tmp_kernel_op_144*tmp_kernel_op_151)) + ((tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152)*(tmp_kernel_op_143*tmp_kernel_op_151 + tmp_kernel_op_150*tmp_kernel_op_152))) + tmp_kernel_op_58*(((tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)*(tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39)) + ((tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40)*(tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_40))) + tmp_kernel_op_94*(((tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)*(tmp_kernel_op_69*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_79)) + ((tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)*(tmp_kernel_op_71*tmp_kernel_op_79 + tmp_kernel_op_78*tmp_kernel_op_80)));
+             const real_t elMatDiag_1 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)*(tmp_kernel_op_105*tmp_kernel_op_174 + tmp_kernel_op_108*tmp_kernel_op_175)) + ((tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174)*(tmp_kernel_op_107*tmp_kernel_op_175 + tmp_kernel_op_116*tmp_kernel_op_174))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)*(tmp_kernel_op_141*tmp_kernel_op_177 + tmp_kernel_op_144*tmp_kernel_op_178)) + ((tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177)*(tmp_kernel_op_143*tmp_kernel_op_178 + tmp_kernel_op_152*tmp_kernel_op_177))) + tmp_kernel_op_58*(((tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)*(tmp_kernel_op_168*tmp_kernel_op_29 + tmp_kernel_op_169*tmp_kernel_op_32)) + ((tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31)*(tmp_kernel_op_168*tmp_kernel_op_40 + tmp_kernel_op_169*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)*(tmp_kernel_op_171*tmp_kernel_op_69 + tmp_kernel_op_172*tmp_kernel_op_72)) + ((tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)*(tmp_kernel_op_171*tmp_kernel_op_80 + tmp_kernel_op_172*tmp_kernel_op_71)));
+             const real_t elMatDiag_2 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)*(tmp_kernel_op_105*tmp_kernel_op_186 + tmp_kernel_op_108*tmp_kernel_op_187)) + ((tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186)*(tmp_kernel_op_107*tmp_kernel_op_187 + tmp_kernel_op_116*tmp_kernel_op_186))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)*(tmp_kernel_op_141*tmp_kernel_op_189 + tmp_kernel_op_144*tmp_kernel_op_190)) + ((tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189)*(tmp_kernel_op_143*tmp_kernel_op_190 + tmp_kernel_op_152*tmp_kernel_op_189))) + tmp_kernel_op_58*(((tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)*(tmp_kernel_op_180*tmp_kernel_op_29 + tmp_kernel_op_181*tmp_kernel_op_32)) + ((tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31)*(tmp_kernel_op_180*tmp_kernel_op_40 + tmp_kernel_op_181*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)*(tmp_kernel_op_183*tmp_kernel_op_69 + tmp_kernel_op_184*tmp_kernel_op_72)) + ((tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)*(tmp_kernel_op_183*tmp_kernel_op_80 + tmp_kernel_op_184*tmp_kernel_op_71)));
+             const real_t elMatDiag_3 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)*(tmp_kernel_op_105*tmp_kernel_op_205 + tmp_kernel_op_108*tmp_kernel_op_208)) + ((tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205)*(tmp_kernel_op_107*tmp_kernel_op_208 + tmp_kernel_op_116*tmp_kernel_op_205))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)*(tmp_kernel_op_141*tmp_kernel_op_211 + tmp_kernel_op_144*tmp_kernel_op_214)) + ((tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211)*(tmp_kernel_op_143*tmp_kernel_op_214 + tmp_kernel_op_152*tmp_kernel_op_211))) + tmp_kernel_op_58*(((tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)*(tmp_kernel_op_193*tmp_kernel_op_29 + tmp_kernel_op_196*tmp_kernel_op_32)) + ((tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31)*(tmp_kernel_op_193*tmp_kernel_op_40 + tmp_kernel_op_196*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)*(tmp_kernel_op_199*tmp_kernel_op_69 + tmp_kernel_op_202*tmp_kernel_op_72)) + ((tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)*(tmp_kernel_op_199*tmp_kernel_op_80 + tmp_kernel_op_202*tmp_kernel_op_71)));
+             const real_t elMatDiag_4 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)*(tmp_kernel_op_105*tmp_kernel_op_222 + tmp_kernel_op_108*tmp_kernel_op_223)) + ((tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222)*(tmp_kernel_op_107*tmp_kernel_op_223 + tmp_kernel_op_116*tmp_kernel_op_222))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)*(tmp_kernel_op_141*tmp_kernel_op_225 + tmp_kernel_op_144*tmp_kernel_op_226)) + ((tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225)*(tmp_kernel_op_143*tmp_kernel_op_226 + tmp_kernel_op_152*tmp_kernel_op_225))) + tmp_kernel_op_58*(((tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)*(tmp_kernel_op_216*tmp_kernel_op_29 + tmp_kernel_op_217*tmp_kernel_op_32)) + ((tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31)*(tmp_kernel_op_216*tmp_kernel_op_40 + tmp_kernel_op_217*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)*(tmp_kernel_op_219*tmp_kernel_op_69 + tmp_kernel_op_220*tmp_kernel_op_72)) + ((tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)*(tmp_kernel_op_219*tmp_kernel_op_80 + tmp_kernel_op_220*tmp_kernel_op_71)));
+             const real_t elMatDiag_5 = tmp_kernel_op_130*(((tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)*(tmp_kernel_op_105*tmp_kernel_op_234 + tmp_kernel_op_108*tmp_kernel_op_235)) + ((tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234)*(tmp_kernel_op_107*tmp_kernel_op_235 + tmp_kernel_op_116*tmp_kernel_op_234))) + tmp_kernel_op_166*(((tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)*(tmp_kernel_op_141*tmp_kernel_op_237 + tmp_kernel_op_144*tmp_kernel_op_238)) + ((tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237)*(tmp_kernel_op_143*tmp_kernel_op_238 + tmp_kernel_op_152*tmp_kernel_op_237))) + tmp_kernel_op_58*(((tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)*(tmp_kernel_op_228*tmp_kernel_op_29 + tmp_kernel_op_229*tmp_kernel_op_32)) + ((tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31)*(tmp_kernel_op_228*tmp_kernel_op_40 + tmp_kernel_op_229*tmp_kernel_op_31))) + tmp_kernel_op_94*(((tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)*(tmp_kernel_op_231*tmp_kernel_op_69 + tmp_kernel_op_232*tmp_kernel_op_72)) + ((tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)*(tmp_kernel_op_231*tmp_kernel_op_80 + tmp_kernel_op_232*tmp_kernel_op_71)));
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..136ae0bf2be81369823849eb910add1f0f986983
--- /dev/null
+++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp
@@ -0,0 +1,925 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseDiffusionAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseDiffusionAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_1 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_15 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_16 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_17 = -tmp_kernel_op_16;
+       const real_t tmp_kernel_op_18 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_19 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19*1.0 / (-tmp_kernel_op_1*tmp_kernel_op_15 + tmp_kernel_op_17*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_21 = tmp_kernel_op_20*1.0;
+       const real_t tmp_kernel_op_25 = -rayVertex_1;
+       const real_t tmp_kernel_op_26 = -rayVertex_0;
+       const real_t tmp_kernel_op_35 = 1.3333333333333333;
+       const real_t tmp_kernel_op_36 = 1.3333333333333333;
+       const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36 - 3.0;
+       const real_t tmp_kernel_op_50 = tmp_kernel_op_19*1.0 / (tmp_kernel_op_0*tmp_kernel_op_15 - tmp_kernel_op_16*tmp_kernel_op_18);
+       const real_t tmp_kernel_op_51 = tmp_kernel_op_50*1.0;
+       const real_t tmp_kernel_op_77 = 0.80000000000000004;
+       const real_t tmp_kernel_op_78 = 2.3999999999999999;
+       const real_t tmp_kernel_op_79 = tmp_kernel_op_77 + tmp_kernel_op_78 - 3.0;
+       const real_t tmp_kernel_op_115 = 2.3999999999999999;
+       const real_t tmp_kernel_op_116 = 0.80000000000000004;
+       const real_t tmp_kernel_op_117 = tmp_kernel_op_115 + tmp_kernel_op_116 - 3.0;
+       const real_t tmp_kernel_op_153 = 0.80000000000000004;
+       const real_t tmp_kernel_op_154 = 0.80000000000000004;
+       const real_t tmp_kernel_op_155 = tmp_kernel_op_153 + tmp_kernel_op_154 - 3.0;
+       const real_t tmp_kernel_op_217 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_218 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_220 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36;
+       const real_t tmp_kernel_op_221 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_35;
+       const real_t tmp_kernel_op_225 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_78;
+       const real_t tmp_kernel_op_226 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_77;
+       const real_t tmp_kernel_op_228 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_78;
+       const real_t tmp_kernel_op_229 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_77;
+       const real_t tmp_kernel_op_233 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_116;
+       const real_t tmp_kernel_op_234 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_115;
+       const real_t tmp_kernel_op_236 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_116;
+       const real_t tmp_kernel_op_237 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_115;
+       const real_t tmp_kernel_op_241 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_154;
+       const real_t tmp_kernel_op_242 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_153;
+       const real_t tmp_kernel_op_244 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_154;
+       const real_t tmp_kernel_op_245 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_153;
+       const real_t tmp_kernel_op_250 = -tmp_kernel_op_35 + 1.3333333333333335;
+       const real_t tmp_kernel_op_255 = -tmp_kernel_op_77 - 0.79999999999999982;
+       const real_t tmp_kernel_op_260 = -tmp_kernel_op_115 + 2.3999999999999999;
+       const real_t tmp_kernel_op_265 = -tmp_kernel_op_153 + 2.3999999999999999;
+       const real_t tmp_kernel_op_271 = -tmp_kernel_op_36 + 1.3333333333333335;
+       const real_t tmp_kernel_op_276 = -tmp_kernel_op_78 + 2.3999999999999999;
+       const real_t tmp_kernel_op_281 = -tmp_kernel_op_116 - 0.79999999999999982;
+       const real_t tmp_kernel_op_286 = -tmp_kernel_op_154 + 2.3999999999999999;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+             const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+             const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+             const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+             const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+             const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+             const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+             const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_40 = tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -tmp_kernel_op_33;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = -p_affine_0_0;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = -p_affine_0_1;
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_46 + tmp_kernel_op_8*0.33333333333333331;
+             const real_t tmp_kernel_op_48 = (tmp_kernel_op_47*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_45 + tmp_kernel_op_48;
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_49, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_44*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_54 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_44) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_47);
+             const real_t tmp_kernel_op_55 = pow(tmp_kernel_op_49, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_47*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_44*tmp_kernel_op_47;
+             const real_t tmp_kernel_op_60 = abs_det_jac_affine_GRAY*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_53 - tmp_kernel_op_48*tmp_kernel_op_56)*(tmp_kernel_op_16*tmp_kernel_op_57 + tmp_kernel_op_45*tmp_kernel_op_58) - (tmp_kernel_op_0*tmp_kernel_op_57 + tmp_kernel_op_56*tmp_kernel_op_59)*(tmp_kernel_op_16*tmp_kernel_op_53 - tmp_kernel_op_58*tmp_kernel_op_59));
+             const real_t tmp_kernel_op_61 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+             const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+             const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = tmp_kernel_op_62 + tmp_kernel_op_64;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_21*pow(tmp_kernel_op_65, -0.50000000000000000);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_61*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = pow(tmp_kernel_op_65, -1.5000000000000000);
+             const real_t tmp_kernel_op_69 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_61) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_1*tmp_kernel_op_67 + tmp_kernel_op_64*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_73 = -tmp_kernel_op_17*tmp_kernel_op_72 + tmp_kernel_op_62*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_17*tmp_kernel_op_67 + tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_1*tmp_kernel_op_72 - tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_71*tmp_kernel_op_73 + tmp_kernel_op_74*tmp_kernel_op_75);
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_76*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_76*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_71*tmp_kernel_op_80 + tmp_kernel_op_74*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_83 = -tmp_kernel_op_75;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_73*tmp_kernel_op_81 + tmp_kernel_op_80*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_86 = (tmp_kernel_op_85*tmp_kernel_op_85);
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_88 = (tmp_kernel_op_87*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_86 + tmp_kernel_op_88;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_51*pow(tmp_kernel_op_89, -0.50000000000000000);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_85*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_92 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_85) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_87);
+             const real_t tmp_kernel_op_93 = pow(tmp_kernel_op_89, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_87*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_85*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_98 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_91 - tmp_kernel_op_88*tmp_kernel_op_94)*(tmp_kernel_op_16*tmp_kernel_op_95 + tmp_kernel_op_86*tmp_kernel_op_96) - (tmp_kernel_op_0*tmp_kernel_op_95 + tmp_kernel_op_94*tmp_kernel_op_97)*(tmp_kernel_op_16*tmp_kernel_op_91 - tmp_kernel_op_96*tmp_kernel_op_97));
+             const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+             const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_100 + tmp_kernel_op_102;
+             const real_t tmp_kernel_op_104 = pow(tmp_kernel_op_103, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_103, -1.5000000000000000);
+             const real_t tmp_kernel_op_107 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_99) - tmp_kernel_op_17*(tmp_kernel_op_101 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_106*tmp_kernel_op_107*1.0;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_105 + tmp_kernel_op_102*tmp_kernel_op_108;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_104;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_100*tmp_kernel_op_106*tmp_kernel_op_107*1.0 - tmp_kernel_op_110*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_101*tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_99*1.0 + tmp_kernel_op_105*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_1*tmp_kernel_op_110 - tmp_kernel_op_101*tmp_kernel_op_108*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_109*tmp_kernel_op_111 + tmp_kernel_op_112*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_114*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_117 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_114*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_117 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_109*tmp_kernel_op_118 + tmp_kernel_op_112*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = -tmp_kernel_op_113;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_111*tmp_kernel_op_119 + tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_124 = (tmp_kernel_op_123*tmp_kernel_op_123);
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.59999999999999998;
+             const real_t tmp_kernel_op_126 = (tmp_kernel_op_125*tmp_kernel_op_125);
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124 + tmp_kernel_op_126;
+             const real_t tmp_kernel_op_128 = pow(tmp_kernel_op_127, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_123*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_130 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_123) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_125);
+             const real_t tmp_kernel_op_131 = pow(tmp_kernel_op_127, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_132 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_125*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_123*tmp_kernel_op_125;
+             const real_t tmp_kernel_op_136 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_129 - tmp_kernel_op_126*tmp_kernel_op_132)*(tmp_kernel_op_124*tmp_kernel_op_134 + tmp_kernel_op_133*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_133 + tmp_kernel_op_132*tmp_kernel_op_135)*(tmp_kernel_op_129*tmp_kernel_op_16 - tmp_kernel_op_134*tmp_kernel_op_135));
+             const real_t tmp_kernel_op_137 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_138 = (tmp_kernel_op_137*tmp_kernel_op_137);
+             const real_t tmp_kernel_op_139 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_140 = (tmp_kernel_op_139*tmp_kernel_op_139);
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_138 + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_142 = pow(tmp_kernel_op_141, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_137*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_144 = pow(tmp_kernel_op_141, -1.5000000000000000);
+             const real_t tmp_kernel_op_145 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_137 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_139 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_144*tmp_kernel_op_145*1.0;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_1*tmp_kernel_op_143 + tmp_kernel_op_140*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_139*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_138*tmp_kernel_op_144*tmp_kernel_op_145*1.0 - tmp_kernel_op_148*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_144*tmp_kernel_op_145*1.0 + tmp_kernel_op_143*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_1*tmp_kernel_op_148 - tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_152 = 1.0 / (tmp_kernel_op_147*tmp_kernel_op_149 + tmp_kernel_op_150*tmp_kernel_op_151);
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_152*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_155 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_152*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_155 + jac_affine_inv_1_0_GRAY*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_150*tmp_kernel_op_157;
+             const real_t tmp_kernel_op_159 = -tmp_kernel_op_151;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_149*tmp_kernel_op_157 + tmp_kernel_op_156*tmp_kernel_op_159;
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_162 = (tmp_kernel_op_161*tmp_kernel_op_161);
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_164 = (tmp_kernel_op_163*tmp_kernel_op_163);
+             const real_t tmp_kernel_op_165 = tmp_kernel_op_162 + tmp_kernel_op_164;
+             const real_t tmp_kernel_op_166 = pow(tmp_kernel_op_165, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_167 = tmp_kernel_op_161*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_168 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_161) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_163);
+             const real_t tmp_kernel_op_169 = pow(tmp_kernel_op_165, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_170 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_171 = tmp_kernel_op_163*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_172 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_173 = tmp_kernel_op_161*tmp_kernel_op_163;
+             const real_t tmp_kernel_op_174 = abs_det_jac_affine_GRAY*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_167 - tmp_kernel_op_164*tmp_kernel_op_170)*(tmp_kernel_op_16*tmp_kernel_op_171 + tmp_kernel_op_162*tmp_kernel_op_172) - (tmp_kernel_op_0*tmp_kernel_op_171 + tmp_kernel_op_170*tmp_kernel_op_173)*(tmp_kernel_op_16*tmp_kernel_op_167 - tmp_kernel_op_172*tmp_kernel_op_173));
+             const real_t tmp_kernel_op_175 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+             const real_t tmp_kernel_op_176 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_177 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_178 = tmp_kernel_op_176*tmp_kernel_op_29 + tmp_kernel_op_177*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_179 = tmp_kernel_op_176*tmp_kernel_op_41 + tmp_kernel_op_177*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_180 = tmp_kernel_op_76*(tmp_kernel_op_77 - 1.0);
+             const real_t tmp_kernel_op_181 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_182 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_183 = tmp_kernel_op_181*tmp_kernel_op_71 + tmp_kernel_op_182*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_184 = tmp_kernel_op_181*tmp_kernel_op_83 + tmp_kernel_op_182*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_185 = tmp_kernel_op_114*(tmp_kernel_op_115 - 1.0);
+             const real_t tmp_kernel_op_186 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_187 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_188 = tmp_kernel_op_109*tmp_kernel_op_186 + tmp_kernel_op_112*tmp_kernel_op_187;
+             const real_t tmp_kernel_op_189 = tmp_kernel_op_111*tmp_kernel_op_187 + tmp_kernel_op_121*tmp_kernel_op_186;
+             const real_t tmp_kernel_op_190 = tmp_kernel_op_152*(tmp_kernel_op_153 - 1.0);
+             const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_GRAY*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_192 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_193 = tmp_kernel_op_147*tmp_kernel_op_191 + tmp_kernel_op_150*tmp_kernel_op_192;
+             const real_t tmp_kernel_op_194 = tmp_kernel_op_149*tmp_kernel_op_192 + tmp_kernel_op_159*tmp_kernel_op_191;
+             const real_t tmp_kernel_op_195 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_188 + tmp_kernel_op_122*tmp_kernel_op_189) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_193 + tmp_kernel_op_160*tmp_kernel_op_194) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_40 + tmp_kernel_op_179*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_82 + tmp_kernel_op_184*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+             const real_t tmp_kernel_op_197 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_198 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_199 = tmp_kernel_op_197*tmp_kernel_op_29 + tmp_kernel_op_198*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_200 = tmp_kernel_op_197*tmp_kernel_op_41 + tmp_kernel_op_198*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_201 = tmp_kernel_op_76*(tmp_kernel_op_78 - 1.0);
+             const real_t tmp_kernel_op_202 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_203 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_204 = tmp_kernel_op_202*tmp_kernel_op_71 + tmp_kernel_op_203*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_205 = tmp_kernel_op_202*tmp_kernel_op_83 + tmp_kernel_op_203*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_206 = tmp_kernel_op_114*(tmp_kernel_op_116 - 1.0);
+             const real_t tmp_kernel_op_207 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_208 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_209 = tmp_kernel_op_109*tmp_kernel_op_207 + tmp_kernel_op_112*tmp_kernel_op_208;
+             const real_t tmp_kernel_op_210 = tmp_kernel_op_111*tmp_kernel_op_208 + tmp_kernel_op_121*tmp_kernel_op_207;
+             const real_t tmp_kernel_op_211 = tmp_kernel_op_152*(tmp_kernel_op_154 - 1.0);
+             const real_t tmp_kernel_op_212 = jac_affine_inv_1_1_GRAY*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_214 = tmp_kernel_op_147*tmp_kernel_op_212 + tmp_kernel_op_150*tmp_kernel_op_213;
+             const real_t tmp_kernel_op_215 = tmp_kernel_op_149*tmp_kernel_op_213 + tmp_kernel_op_159*tmp_kernel_op_212;
+             const real_t tmp_kernel_op_216 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_209 + tmp_kernel_op_122*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_214 + tmp_kernel_op_160*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_40 + tmp_kernel_op_200*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_82 + tmp_kernel_op_205*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_219 = tmp_kernel_op_34*(tmp_kernel_op_217 + tmp_kernel_op_218);
+             const real_t tmp_kernel_op_222 = tmp_kernel_op_34*(tmp_kernel_op_220 + tmp_kernel_op_221);
+             const real_t tmp_kernel_op_223 = tmp_kernel_op_219*tmp_kernel_op_29 + tmp_kernel_op_222*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_224 = tmp_kernel_op_219*tmp_kernel_op_41 + tmp_kernel_op_222*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_227 = tmp_kernel_op_76*(tmp_kernel_op_225 + tmp_kernel_op_226);
+             const real_t tmp_kernel_op_230 = tmp_kernel_op_76*(tmp_kernel_op_228 + tmp_kernel_op_229);
+             const real_t tmp_kernel_op_231 = tmp_kernel_op_227*tmp_kernel_op_71 + tmp_kernel_op_230*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_232 = tmp_kernel_op_227*tmp_kernel_op_83 + tmp_kernel_op_230*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_235 = tmp_kernel_op_114*(tmp_kernel_op_233 + tmp_kernel_op_234);
+             const real_t tmp_kernel_op_238 = tmp_kernel_op_114*(tmp_kernel_op_236 + tmp_kernel_op_237);
+             const real_t tmp_kernel_op_239 = tmp_kernel_op_109*tmp_kernel_op_235 + tmp_kernel_op_112*tmp_kernel_op_238;
+             const real_t tmp_kernel_op_240 = tmp_kernel_op_111*tmp_kernel_op_238 + tmp_kernel_op_121*tmp_kernel_op_235;
+             const real_t tmp_kernel_op_243 = tmp_kernel_op_152*(tmp_kernel_op_241 + tmp_kernel_op_242);
+             const real_t tmp_kernel_op_246 = tmp_kernel_op_152*(tmp_kernel_op_244 + tmp_kernel_op_245);
+             const real_t tmp_kernel_op_247 = tmp_kernel_op_147*tmp_kernel_op_243 + tmp_kernel_op_150*tmp_kernel_op_246;
+             const real_t tmp_kernel_op_248 = tmp_kernel_op_149*tmp_kernel_op_246 + tmp_kernel_op_159*tmp_kernel_op_243;
+             const real_t tmp_kernel_op_249 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_239 + tmp_kernel_op_122*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_247 + tmp_kernel_op_160*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_40 + tmp_kernel_op_224*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_82 + tmp_kernel_op_232*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_251 = tmp_kernel_op_34*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_250 - tmp_kernel_op_217);
+             const real_t tmp_kernel_op_252 = tmp_kernel_op_34*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_250 - tmp_kernel_op_220);
+             const real_t tmp_kernel_op_253 = tmp_kernel_op_251*tmp_kernel_op_29 + tmp_kernel_op_252*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_254 = tmp_kernel_op_251*tmp_kernel_op_41 + tmp_kernel_op_252*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_256 = tmp_kernel_op_76*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_255 - tmp_kernel_op_225);
+             const real_t tmp_kernel_op_257 = tmp_kernel_op_76*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_255 - tmp_kernel_op_228);
+             const real_t tmp_kernel_op_258 = tmp_kernel_op_256*tmp_kernel_op_71 + tmp_kernel_op_257*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_259 = tmp_kernel_op_256*tmp_kernel_op_83 + tmp_kernel_op_257*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_261 = tmp_kernel_op_114*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_260 - tmp_kernel_op_233);
+             const real_t tmp_kernel_op_262 = tmp_kernel_op_114*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_260 - tmp_kernel_op_236);
+             const real_t tmp_kernel_op_263 = tmp_kernel_op_109*tmp_kernel_op_261 + tmp_kernel_op_112*tmp_kernel_op_262;
+             const real_t tmp_kernel_op_264 = tmp_kernel_op_111*tmp_kernel_op_262 + tmp_kernel_op_121*tmp_kernel_op_261;
+             const real_t tmp_kernel_op_266 = tmp_kernel_op_152*(jac_affine_inv_1_1_GRAY*tmp_kernel_op_265 - tmp_kernel_op_241);
+             const real_t tmp_kernel_op_267 = tmp_kernel_op_152*(jac_affine_inv_1_0_GRAY*tmp_kernel_op_265 - tmp_kernel_op_244);
+             const real_t tmp_kernel_op_268 = tmp_kernel_op_147*tmp_kernel_op_266 + tmp_kernel_op_150*tmp_kernel_op_267;
+             const real_t tmp_kernel_op_269 = tmp_kernel_op_149*tmp_kernel_op_267 + tmp_kernel_op_159*tmp_kernel_op_266;
+             const real_t tmp_kernel_op_270 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_263 + tmp_kernel_op_122*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_268 + tmp_kernel_op_160*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_40 + tmp_kernel_op_254*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_82 + tmp_kernel_op_259*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_272 = tmp_kernel_op_34*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_271 - tmp_kernel_op_218);
+             const real_t tmp_kernel_op_273 = tmp_kernel_op_34*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_271 - tmp_kernel_op_221);
+             const real_t tmp_kernel_op_274 = tmp_kernel_op_272*tmp_kernel_op_29 + tmp_kernel_op_273*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_275 = tmp_kernel_op_272*tmp_kernel_op_41 + tmp_kernel_op_273*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_277 = tmp_kernel_op_76*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_276 - tmp_kernel_op_226);
+             const real_t tmp_kernel_op_278 = tmp_kernel_op_76*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_276 - tmp_kernel_op_229);
+             const real_t tmp_kernel_op_279 = tmp_kernel_op_277*tmp_kernel_op_71 + tmp_kernel_op_278*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_280 = tmp_kernel_op_277*tmp_kernel_op_83 + tmp_kernel_op_278*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_282 = tmp_kernel_op_114*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_281 - tmp_kernel_op_234);
+             const real_t tmp_kernel_op_283 = tmp_kernel_op_114*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_281 - tmp_kernel_op_237);
+             const real_t tmp_kernel_op_284 = tmp_kernel_op_109*tmp_kernel_op_282 + tmp_kernel_op_112*tmp_kernel_op_283;
+             const real_t tmp_kernel_op_285 = tmp_kernel_op_111*tmp_kernel_op_283 + tmp_kernel_op_121*tmp_kernel_op_282;
+             const real_t tmp_kernel_op_287 = tmp_kernel_op_152*(jac_affine_inv_0_1_GRAY*tmp_kernel_op_286 - tmp_kernel_op_242);
+             const real_t tmp_kernel_op_288 = tmp_kernel_op_152*(jac_affine_inv_0_0_GRAY*tmp_kernel_op_286 - tmp_kernel_op_245);
+             const real_t tmp_kernel_op_289 = tmp_kernel_op_147*tmp_kernel_op_287 + tmp_kernel_op_150*tmp_kernel_op_288;
+             const real_t tmp_kernel_op_290 = tmp_kernel_op_149*tmp_kernel_op_288 + tmp_kernel_op_159*tmp_kernel_op_287;
+             const real_t tmp_kernel_op_291 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_284 + tmp_kernel_op_122*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_289 + tmp_kernel_op_160*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_274*tmp_kernel_op_40 + tmp_kernel_op_275*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_279*tmp_kernel_op_82 + tmp_kernel_op_280*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_292 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_209 + tmp_kernel_op_189*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_214 + tmp_kernel_op_194*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_199 + tmp_kernel_op_179*tmp_kernel_op_200) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_204 + tmp_kernel_op_184*tmp_kernel_op_205);
+             const real_t tmp_kernel_op_293 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_239 + tmp_kernel_op_189*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_247 + tmp_kernel_op_194*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_223 + tmp_kernel_op_179*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_231 + tmp_kernel_op_184*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_294 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_263 + tmp_kernel_op_189*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_268 + tmp_kernel_op_194*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_253 + tmp_kernel_op_179*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_258 + tmp_kernel_op_184*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_295 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_284 + tmp_kernel_op_189*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_289 + tmp_kernel_op_194*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_274 + tmp_kernel_op_179*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_279 + tmp_kernel_op_184*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_296 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_239 + tmp_kernel_op_210*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_247 + tmp_kernel_op_215*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_223 + tmp_kernel_op_200*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_231 + tmp_kernel_op_205*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_297 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_263 + tmp_kernel_op_210*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_268 + tmp_kernel_op_215*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_253 + tmp_kernel_op_200*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_258 + tmp_kernel_op_205*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_298 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_284 + tmp_kernel_op_210*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_289 + tmp_kernel_op_215*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_274 + tmp_kernel_op_200*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_279 + tmp_kernel_op_205*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_299 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_263 + tmp_kernel_op_240*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_268 + tmp_kernel_op_248*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_253 + tmp_kernel_op_224*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_258 + tmp_kernel_op_232*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_300 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_284 + tmp_kernel_op_240*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_289 + tmp_kernel_op_248*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_274 + tmp_kernel_op_224*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_279 + tmp_kernel_op_232*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_301 = tmp_kernel_op_136*(tmp_kernel_op_263*tmp_kernel_op_284 + tmp_kernel_op_264*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_268*tmp_kernel_op_289 + tmp_kernel_op_269*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_274 + tmp_kernel_op_254*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_279 + tmp_kernel_op_259*tmp_kernel_op_280);
+             const real_t elMat_0_0 = tmp_kernel_op_136*((tmp_kernel_op_120*tmp_kernel_op_120) + (tmp_kernel_op_122*tmp_kernel_op_122)) + tmp_kernel_op_174*((tmp_kernel_op_158*tmp_kernel_op_158) + (tmp_kernel_op_160*tmp_kernel_op_160)) + tmp_kernel_op_60*((tmp_kernel_op_40*tmp_kernel_op_40) + (tmp_kernel_op_42*tmp_kernel_op_42)) + tmp_kernel_op_98*((tmp_kernel_op_82*tmp_kernel_op_82) + (tmp_kernel_op_84*tmp_kernel_op_84));
+             const real_t elMat_0_1 = tmp_kernel_op_195;
+             const real_t elMat_0_2 = tmp_kernel_op_216;
+             const real_t elMat_0_3 = tmp_kernel_op_249;
+             const real_t elMat_0_4 = tmp_kernel_op_270;
+             const real_t elMat_0_5 = tmp_kernel_op_291;
+             const real_t elMat_1_0 = tmp_kernel_op_195;
+             const real_t elMat_1_1 = tmp_kernel_op_136*((tmp_kernel_op_188*tmp_kernel_op_188) + (tmp_kernel_op_189*tmp_kernel_op_189)) + tmp_kernel_op_174*((tmp_kernel_op_193*tmp_kernel_op_193) + (tmp_kernel_op_194*tmp_kernel_op_194)) + tmp_kernel_op_60*((tmp_kernel_op_178*tmp_kernel_op_178) + (tmp_kernel_op_179*tmp_kernel_op_179)) + tmp_kernel_op_98*((tmp_kernel_op_183*tmp_kernel_op_183) + (tmp_kernel_op_184*tmp_kernel_op_184));
+             const real_t elMat_1_2 = tmp_kernel_op_292;
+             const real_t elMat_1_3 = tmp_kernel_op_293;
+             const real_t elMat_1_4 = tmp_kernel_op_294;
+             const real_t elMat_1_5 = tmp_kernel_op_295;
+             const real_t elMat_2_0 = tmp_kernel_op_216;
+             const real_t elMat_2_1 = tmp_kernel_op_292;
+             const real_t elMat_2_2 = tmp_kernel_op_136*((tmp_kernel_op_209*tmp_kernel_op_209) + (tmp_kernel_op_210*tmp_kernel_op_210)) + tmp_kernel_op_174*((tmp_kernel_op_214*tmp_kernel_op_214) + (tmp_kernel_op_215*tmp_kernel_op_215)) + tmp_kernel_op_60*((tmp_kernel_op_199*tmp_kernel_op_199) + (tmp_kernel_op_200*tmp_kernel_op_200)) + tmp_kernel_op_98*((tmp_kernel_op_204*tmp_kernel_op_204) + (tmp_kernel_op_205*tmp_kernel_op_205));
+             const real_t elMat_2_3 = tmp_kernel_op_296;
+             const real_t elMat_2_4 = tmp_kernel_op_297;
+             const real_t elMat_2_5 = tmp_kernel_op_298;
+             const real_t elMat_3_0 = tmp_kernel_op_249;
+             const real_t elMat_3_1 = tmp_kernel_op_293;
+             const real_t elMat_3_2 = tmp_kernel_op_296;
+             const real_t elMat_3_3 = tmp_kernel_op_136*((tmp_kernel_op_239*tmp_kernel_op_239) + (tmp_kernel_op_240*tmp_kernel_op_240)) + tmp_kernel_op_174*((tmp_kernel_op_247*tmp_kernel_op_247) + (tmp_kernel_op_248*tmp_kernel_op_248)) + tmp_kernel_op_60*((tmp_kernel_op_223*tmp_kernel_op_223) + (tmp_kernel_op_224*tmp_kernel_op_224)) + tmp_kernel_op_98*((tmp_kernel_op_231*tmp_kernel_op_231) + (tmp_kernel_op_232*tmp_kernel_op_232));
+             const real_t elMat_3_4 = tmp_kernel_op_299;
+             const real_t elMat_3_5 = tmp_kernel_op_300;
+             const real_t elMat_4_0 = tmp_kernel_op_270;
+             const real_t elMat_4_1 = tmp_kernel_op_294;
+             const real_t elMat_4_2 = tmp_kernel_op_297;
+             const real_t elMat_4_3 = tmp_kernel_op_299;
+             const real_t elMat_4_4 = tmp_kernel_op_136*((tmp_kernel_op_263*tmp_kernel_op_263) + (tmp_kernel_op_264*tmp_kernel_op_264)) + tmp_kernel_op_174*((tmp_kernel_op_268*tmp_kernel_op_268) + (tmp_kernel_op_269*tmp_kernel_op_269)) + tmp_kernel_op_60*((tmp_kernel_op_253*tmp_kernel_op_253) + (tmp_kernel_op_254*tmp_kernel_op_254)) + tmp_kernel_op_98*((tmp_kernel_op_258*tmp_kernel_op_258) + (tmp_kernel_op_259*tmp_kernel_op_259));
+             const real_t elMat_4_5 = tmp_kernel_op_301;
+             const real_t elMat_5_0 = tmp_kernel_op_291;
+             const real_t elMat_5_1 = tmp_kernel_op_295;
+             const real_t elMat_5_2 = tmp_kernel_op_298;
+             const real_t elMat_5_3 = tmp_kernel_op_300;
+             const real_t elMat_5_4 = tmp_kernel_op_301;
+             const real_t elMat_5_5 = tmp_kernel_op_136*((tmp_kernel_op_284*tmp_kernel_op_284) + (tmp_kernel_op_285*tmp_kernel_op_285)) + tmp_kernel_op_174*((tmp_kernel_op_289*tmp_kernel_op_289) + (tmp_kernel_op_290*tmp_kernel_op_290)) + tmp_kernel_op_60*((tmp_kernel_op_274*tmp_kernel_op_274) + (tmp_kernel_op_275*tmp_kernel_op_275)) + tmp_kernel_op_98*((tmp_kernel_op_279*tmp_kernel_op_279) + (tmp_kernel_op_280*tmp_kernel_op_280));
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       const real_t tmp_moved_constant_0 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_1 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_2 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36;
+       const real_t tmp_moved_constant_3 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_35;
+       const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_78;
+       const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_77;
+       const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_78;
+       const real_t tmp_moved_constant_7 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_77;
+       const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_116;
+       const real_t tmp_moved_constant_9 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_115;
+       const real_t tmp_moved_constant_10 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_116;
+       const real_t tmp_moved_constant_11 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_115;
+       const real_t tmp_moved_constant_12 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_154;
+       const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_153;
+       const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_154;
+       const real_t tmp_moved_constant_15 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_153;
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_4 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 + tmp_kernel_op_3*0.33333333333333331 + tmp_kernel_op_5*0.33333333333333331;
+             const real_t tmp_kernel_op_7 = (tmp_kernel_op_6*tmp_kernel_op_6);
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_1 + tmp_kernel_op_11*0.33333333333333331 + tmp_kernel_op_9*0.33333333333333331;
+             const real_t tmp_kernel_op_13 = (tmp_kernel_op_12*tmp_kernel_op_12);
+             const real_t tmp_kernel_op_14 = tmp_kernel_op_13 + tmp_kernel_op_7;
+             const real_t tmp_kernel_op_22 = pow(tmp_kernel_op_14, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_22*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_24 = pow(tmp_kernel_op_14, -1.5000000000000000);
+             const real_t tmp_kernel_op_27 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_6) - tmp_kernel_op_17*(tmp_kernel_op_12 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_28 = tmp_kernel_op_24*tmp_kernel_op_27*1.0;
+             const real_t tmp_kernel_op_29 = tmp_kernel_op_1*tmp_kernel_op_23 + tmp_kernel_op_13*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_30 = tmp_kernel_op_12*tmp_kernel_op_22;
+             const real_t tmp_kernel_op_31 = -tmp_kernel_op_17*tmp_kernel_op_30 + tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_7*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_12*tmp_kernel_op_24*tmp_kernel_op_27*tmp_kernel_op_6*1.0 + tmp_kernel_op_17*tmp_kernel_op_23;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_1*tmp_kernel_op_30 - tmp_kernel_op_12*tmp_kernel_op_28*tmp_kernel_op_6;
+             const real_t tmp_kernel_op_34 = 1.0 / (tmp_kernel_op_29*tmp_kernel_op_31 + tmp_kernel_op_32*tmp_kernel_op_33);
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_39 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_37);
+             const real_t tmp_kernel_op_40 = tmp_kernel_op_29*tmp_kernel_op_38 + tmp_kernel_op_32*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -tmp_kernel_op_33;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_39 + tmp_kernel_op_38*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = -p_affine_0_0;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_2*0.33333333333333331 + tmp_kernel_op_4*0.33333333333333331 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = -p_affine_0_1;
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_10*0.33333333333333331 + tmp_kernel_op_46 + tmp_kernel_op_8*0.33333333333333331;
+             const real_t tmp_kernel_op_48 = (tmp_kernel_op_47*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_45 + tmp_kernel_op_48;
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_49, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_44*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_54 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_44) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_47);
+             const real_t tmp_kernel_op_55 = pow(tmp_kernel_op_49, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_47*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_55*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_54);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_44*tmp_kernel_op_47;
+             const real_t tmp_kernel_op_60 = abs_det_jac_affine_BLUE*-0.28125*abs((tmp_kernel_op_0*tmp_kernel_op_53 - tmp_kernel_op_48*tmp_kernel_op_56)*(tmp_kernel_op_16*tmp_kernel_op_57 + tmp_kernel_op_45*tmp_kernel_op_58) - (tmp_kernel_op_0*tmp_kernel_op_57 + tmp_kernel_op_56*tmp_kernel_op_59)*(tmp_kernel_op_16*tmp_kernel_op_53 - tmp_kernel_op_58*tmp_kernel_op_59));
+             const real_t tmp_kernel_op_61 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.59999999999999998;
+             const real_t tmp_kernel_op_62 = (tmp_kernel_op_61*tmp_kernel_op_61);
+             const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_11*0.59999999999999998 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = tmp_kernel_op_62 + tmp_kernel_op_64;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_21*pow(tmp_kernel_op_65, -0.50000000000000000);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_61*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = pow(tmp_kernel_op_65, -1.5000000000000000);
+             const real_t tmp_kernel_op_69 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_61) - tmp_kernel_op_17*(tmp_kernel_op_25 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_1*tmp_kernel_op_67 + tmp_kernel_op_64*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_66;
+             const real_t tmp_kernel_op_73 = -tmp_kernel_op_17*tmp_kernel_op_72 + tmp_kernel_op_62*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_17*tmp_kernel_op_67 + tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_68*tmp_kernel_op_69*1.0;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_1*tmp_kernel_op_72 - tmp_kernel_op_61*tmp_kernel_op_63*tmp_kernel_op_70;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_71*tmp_kernel_op_73 + tmp_kernel_op_74*tmp_kernel_op_75);
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_76*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_76*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_71*tmp_kernel_op_80 + tmp_kernel_op_74*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_83 = -tmp_kernel_op_75;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_73*tmp_kernel_op_81 + tmp_kernel_op_80*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.59999999999999998 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_86 = (tmp_kernel_op_85*tmp_kernel_op_85);
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_10*0.59999999999999998 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_88 = (tmp_kernel_op_87*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_86 + tmp_kernel_op_88;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_51*pow(tmp_kernel_op_89, -0.50000000000000000);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_85*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_92 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_85) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_87);
+             const real_t tmp_kernel_op_93 = pow(tmp_kernel_op_89, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_87*tmp_kernel_op_90;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_93*(radRayVertex + tmp_kernel_op_50*tmp_kernel_op_92);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_85*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_98 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_91 - tmp_kernel_op_88*tmp_kernel_op_94)*(tmp_kernel_op_16*tmp_kernel_op_95 + tmp_kernel_op_86*tmp_kernel_op_96) - (tmp_kernel_op_0*tmp_kernel_op_95 + tmp_kernel_op_94*tmp_kernel_op_97)*(tmp_kernel_op_16*tmp_kernel_op_91 - tmp_kernel_op_96*tmp_kernel_op_97));
+             const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_3*0.59999999999999998 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+             const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.59999999999999998;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_100 + tmp_kernel_op_102;
+             const real_t tmp_kernel_op_104 = pow(tmp_kernel_op_103, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_103, -1.5000000000000000);
+             const real_t tmp_kernel_op_107 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_26 + tmp_kernel_op_99) - tmp_kernel_op_17*(tmp_kernel_op_101 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_106*tmp_kernel_op_107*1.0;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_1*tmp_kernel_op_105 + tmp_kernel_op_102*tmp_kernel_op_108;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_104;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_100*tmp_kernel_op_106*tmp_kernel_op_107*1.0 - tmp_kernel_op_110*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_101*tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_99*1.0 + tmp_kernel_op_105*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_1*tmp_kernel_op_110 - tmp_kernel_op_101*tmp_kernel_op_108*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_109*tmp_kernel_op_111 + tmp_kernel_op_112*tmp_kernel_op_113);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_114*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_117 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_114*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_117 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_117);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_109*tmp_kernel_op_118 + tmp_kernel_op_112*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = -tmp_kernel_op_113;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_111*tmp_kernel_op_119 + tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_2*0.59999999999999998 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_124 = (tmp_kernel_op_123*tmp_kernel_op_123);
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.59999999999999998;
+             const real_t tmp_kernel_op_126 = (tmp_kernel_op_125*tmp_kernel_op_125);
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124 + tmp_kernel_op_126;
+             const real_t tmp_kernel_op_128 = pow(tmp_kernel_op_127, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_123*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_130 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_123) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_125);
+             const real_t tmp_kernel_op_131 = pow(tmp_kernel_op_127, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_132 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_125*tmp_kernel_op_128;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_131*(radRayVertex + tmp_kernel_op_130*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_123*tmp_kernel_op_125;
+             const real_t tmp_kernel_op_136 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_129 - tmp_kernel_op_126*tmp_kernel_op_132)*(tmp_kernel_op_124*tmp_kernel_op_134 + tmp_kernel_op_133*tmp_kernel_op_16) - (tmp_kernel_op_0*tmp_kernel_op_133 + tmp_kernel_op_132*tmp_kernel_op_135)*(tmp_kernel_op_129*tmp_kernel_op_16 - tmp_kernel_op_134*tmp_kernel_op_135));
+             const real_t tmp_kernel_op_137 = p_affine_0_0 + tmp_kernel_op_3*0.20000000000000001 + tmp_kernel_op_5*0.20000000000000001;
+             const real_t tmp_kernel_op_138 = (tmp_kernel_op_137*tmp_kernel_op_137);
+             const real_t tmp_kernel_op_139 = p_affine_0_1 + tmp_kernel_op_11*0.20000000000000001 + tmp_kernel_op_9*0.20000000000000001;
+             const real_t tmp_kernel_op_140 = (tmp_kernel_op_139*tmp_kernel_op_139);
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_138 + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_142 = pow(tmp_kernel_op_141, -0.50000000000000000)*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_137*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_144 = pow(tmp_kernel_op_141, -1.5000000000000000);
+             const real_t tmp_kernel_op_145 = radRayVertex + tmp_kernel_op_20*(tmp_kernel_op_1*(tmp_kernel_op_137 + tmp_kernel_op_26) - tmp_kernel_op_17*(tmp_kernel_op_139 + tmp_kernel_op_25));
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_144*tmp_kernel_op_145*1.0;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_1*tmp_kernel_op_143 + tmp_kernel_op_140*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_139*tmp_kernel_op_142;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_138*tmp_kernel_op_144*tmp_kernel_op_145*1.0 - tmp_kernel_op_148*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_144*tmp_kernel_op_145*1.0 + tmp_kernel_op_143*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_1*tmp_kernel_op_148 - tmp_kernel_op_137*tmp_kernel_op_139*tmp_kernel_op_146;
+             const real_t tmp_kernel_op_152 = 1.0 / (tmp_kernel_op_147*tmp_kernel_op_149 + tmp_kernel_op_150*tmp_kernel_op_151);
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_152*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_155 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_152*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_155 + jac_affine_inv_1_0_BLUE*tmp_kernel_op_155);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_150*tmp_kernel_op_157;
+             const real_t tmp_kernel_op_159 = -tmp_kernel_op_151;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_149*tmp_kernel_op_157 + tmp_kernel_op_156*tmp_kernel_op_159;
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_2*0.20000000000000001 + tmp_kernel_op_4*0.20000000000000001 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_162 = (tmp_kernel_op_161*tmp_kernel_op_161);
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_10*0.20000000000000001 + tmp_kernel_op_46 + tmp_kernel_op_8*0.20000000000000001;
+             const real_t tmp_kernel_op_164 = (tmp_kernel_op_163*tmp_kernel_op_163);
+             const real_t tmp_kernel_op_165 = tmp_kernel_op_162 + tmp_kernel_op_164;
+             const real_t tmp_kernel_op_166 = pow(tmp_kernel_op_165, -0.50000000000000000)*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_167 = tmp_kernel_op_161*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_168 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_161) + tmp_kernel_op_16*(rayVertex_1 + tmp_kernel_op_163);
+             const real_t tmp_kernel_op_169 = pow(tmp_kernel_op_165, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_170 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_171 = tmp_kernel_op_163*tmp_kernel_op_166;
+             const real_t tmp_kernel_op_172 = tmp_kernel_op_169*(radRayVertex + tmp_kernel_op_168*tmp_kernel_op_50);
+             const real_t tmp_kernel_op_173 = tmp_kernel_op_161*tmp_kernel_op_163;
+             const real_t tmp_kernel_op_174 = abs_det_jac_affine_BLUE*0.26041666666666669*abs((tmp_kernel_op_0*tmp_kernel_op_167 - tmp_kernel_op_164*tmp_kernel_op_170)*(tmp_kernel_op_16*tmp_kernel_op_171 + tmp_kernel_op_162*tmp_kernel_op_172) - (tmp_kernel_op_0*tmp_kernel_op_171 + tmp_kernel_op_170*tmp_kernel_op_173)*(tmp_kernel_op_16*tmp_kernel_op_167 - tmp_kernel_op_172*tmp_kernel_op_173));
+             const real_t tmp_kernel_op_175 = tmp_kernel_op_34*(tmp_kernel_op_35 - 1.0);
+             const real_t tmp_kernel_op_176 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_177 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_175;
+             const real_t tmp_kernel_op_178 = tmp_kernel_op_176*tmp_kernel_op_29 + tmp_kernel_op_177*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_179 = tmp_kernel_op_176*tmp_kernel_op_41 + tmp_kernel_op_177*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_180 = tmp_kernel_op_76*(tmp_kernel_op_77 - 1.0);
+             const real_t tmp_kernel_op_181 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_182 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_180;
+             const real_t tmp_kernel_op_183 = tmp_kernel_op_181*tmp_kernel_op_71 + tmp_kernel_op_182*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_184 = tmp_kernel_op_181*tmp_kernel_op_83 + tmp_kernel_op_182*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_185 = tmp_kernel_op_114*(tmp_kernel_op_115 - 1.0);
+             const real_t tmp_kernel_op_186 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_187 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_185;
+             const real_t tmp_kernel_op_188 = tmp_kernel_op_109*tmp_kernel_op_186 + tmp_kernel_op_112*tmp_kernel_op_187;
+             const real_t tmp_kernel_op_189 = tmp_kernel_op_111*tmp_kernel_op_187 + tmp_kernel_op_121*tmp_kernel_op_186;
+             const real_t tmp_kernel_op_190 = tmp_kernel_op_152*(tmp_kernel_op_153 - 1.0);
+             const real_t tmp_kernel_op_191 = jac_affine_inv_0_1_BLUE*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_192 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_190;
+             const real_t tmp_kernel_op_193 = tmp_kernel_op_147*tmp_kernel_op_191 + tmp_kernel_op_150*tmp_kernel_op_192;
+             const real_t tmp_kernel_op_194 = tmp_kernel_op_149*tmp_kernel_op_192 + tmp_kernel_op_159*tmp_kernel_op_191;
+             const real_t tmp_kernel_op_195 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_188 + tmp_kernel_op_122*tmp_kernel_op_189) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_193 + tmp_kernel_op_160*tmp_kernel_op_194) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_40 + tmp_kernel_op_179*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_82 + tmp_kernel_op_184*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_196 = tmp_kernel_op_34*(tmp_kernel_op_36 - 1.0);
+             const real_t tmp_kernel_op_197 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_198 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_196;
+             const real_t tmp_kernel_op_199 = tmp_kernel_op_197*tmp_kernel_op_29 + tmp_kernel_op_198*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_200 = tmp_kernel_op_197*tmp_kernel_op_41 + tmp_kernel_op_198*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_201 = tmp_kernel_op_76*(tmp_kernel_op_78 - 1.0);
+             const real_t tmp_kernel_op_202 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_203 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_201;
+             const real_t tmp_kernel_op_204 = tmp_kernel_op_202*tmp_kernel_op_71 + tmp_kernel_op_203*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_205 = tmp_kernel_op_202*tmp_kernel_op_83 + tmp_kernel_op_203*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_206 = tmp_kernel_op_114*(tmp_kernel_op_116 - 1.0);
+             const real_t tmp_kernel_op_207 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_208 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_206;
+             const real_t tmp_kernel_op_209 = tmp_kernel_op_109*tmp_kernel_op_207 + tmp_kernel_op_112*tmp_kernel_op_208;
+             const real_t tmp_kernel_op_210 = tmp_kernel_op_111*tmp_kernel_op_208 + tmp_kernel_op_121*tmp_kernel_op_207;
+             const real_t tmp_kernel_op_211 = tmp_kernel_op_152*(tmp_kernel_op_154 - 1.0);
+             const real_t tmp_kernel_op_212 = jac_affine_inv_1_1_BLUE*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_213 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_211;
+             const real_t tmp_kernel_op_214 = tmp_kernel_op_147*tmp_kernel_op_212 + tmp_kernel_op_150*tmp_kernel_op_213;
+             const real_t tmp_kernel_op_215 = tmp_kernel_op_149*tmp_kernel_op_213 + tmp_kernel_op_159*tmp_kernel_op_212;
+             const real_t tmp_kernel_op_216 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_209 + tmp_kernel_op_122*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_214 + tmp_kernel_op_160*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_40 + tmp_kernel_op_200*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_82 + tmp_kernel_op_205*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_219 = tmp_kernel_op_34*(tmp_moved_constant_0 + tmp_moved_constant_1);
+             const real_t tmp_kernel_op_222 = tmp_kernel_op_34*(tmp_moved_constant_2 + tmp_moved_constant_3);
+             const real_t tmp_kernel_op_223 = tmp_kernel_op_219*tmp_kernel_op_29 + tmp_kernel_op_222*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_224 = tmp_kernel_op_219*tmp_kernel_op_41 + tmp_kernel_op_222*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_227 = tmp_kernel_op_76*(tmp_moved_constant_4 + tmp_moved_constant_5);
+             const real_t tmp_kernel_op_230 = tmp_kernel_op_76*(tmp_moved_constant_6 + tmp_moved_constant_7);
+             const real_t tmp_kernel_op_231 = tmp_kernel_op_227*tmp_kernel_op_71 + tmp_kernel_op_230*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_232 = tmp_kernel_op_227*tmp_kernel_op_83 + tmp_kernel_op_230*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_235 = tmp_kernel_op_114*(tmp_moved_constant_8 + tmp_moved_constant_9);
+             const real_t tmp_kernel_op_238 = tmp_kernel_op_114*(tmp_moved_constant_10 + tmp_moved_constant_11);
+             const real_t tmp_kernel_op_239 = tmp_kernel_op_109*tmp_kernel_op_235 + tmp_kernel_op_112*tmp_kernel_op_238;
+             const real_t tmp_kernel_op_240 = tmp_kernel_op_111*tmp_kernel_op_238 + tmp_kernel_op_121*tmp_kernel_op_235;
+             const real_t tmp_kernel_op_243 = tmp_kernel_op_152*(tmp_moved_constant_12 + tmp_moved_constant_13);
+             const real_t tmp_kernel_op_246 = tmp_kernel_op_152*(tmp_moved_constant_14 + tmp_moved_constant_15);
+             const real_t tmp_kernel_op_247 = tmp_kernel_op_147*tmp_kernel_op_243 + tmp_kernel_op_150*tmp_kernel_op_246;
+             const real_t tmp_kernel_op_248 = tmp_kernel_op_149*tmp_kernel_op_246 + tmp_kernel_op_159*tmp_kernel_op_243;
+             const real_t tmp_kernel_op_249 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_239 + tmp_kernel_op_122*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_247 + tmp_kernel_op_160*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_40 + tmp_kernel_op_224*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_82 + tmp_kernel_op_232*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_251 = tmp_kernel_op_34*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_250 - tmp_moved_constant_0);
+             const real_t tmp_kernel_op_252 = tmp_kernel_op_34*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_250 - tmp_moved_constant_2);
+             const real_t tmp_kernel_op_253 = tmp_kernel_op_251*tmp_kernel_op_29 + tmp_kernel_op_252*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_254 = tmp_kernel_op_251*tmp_kernel_op_41 + tmp_kernel_op_252*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_256 = tmp_kernel_op_76*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_255 - tmp_moved_constant_4);
+             const real_t tmp_kernel_op_257 = tmp_kernel_op_76*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_255 - tmp_moved_constant_6);
+             const real_t tmp_kernel_op_258 = tmp_kernel_op_256*tmp_kernel_op_71 + tmp_kernel_op_257*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_259 = tmp_kernel_op_256*tmp_kernel_op_83 + tmp_kernel_op_257*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_261 = tmp_kernel_op_114*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_260 - tmp_moved_constant_8);
+             const real_t tmp_kernel_op_262 = tmp_kernel_op_114*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_260 - tmp_moved_constant_10);
+             const real_t tmp_kernel_op_263 = tmp_kernel_op_109*tmp_kernel_op_261 + tmp_kernel_op_112*tmp_kernel_op_262;
+             const real_t tmp_kernel_op_264 = tmp_kernel_op_111*tmp_kernel_op_262 + tmp_kernel_op_121*tmp_kernel_op_261;
+             const real_t tmp_kernel_op_266 = tmp_kernel_op_152*(jac_affine_inv_1_1_BLUE*tmp_kernel_op_265 - tmp_moved_constant_12);
+             const real_t tmp_kernel_op_267 = tmp_kernel_op_152*(jac_affine_inv_1_0_BLUE*tmp_kernel_op_265 - tmp_moved_constant_14);
+             const real_t tmp_kernel_op_268 = tmp_kernel_op_147*tmp_kernel_op_266 + tmp_kernel_op_150*tmp_kernel_op_267;
+             const real_t tmp_kernel_op_269 = tmp_kernel_op_149*tmp_kernel_op_267 + tmp_kernel_op_159*tmp_kernel_op_266;
+             const real_t tmp_kernel_op_270 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_263 + tmp_kernel_op_122*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_268 + tmp_kernel_op_160*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_40 + tmp_kernel_op_254*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_82 + tmp_kernel_op_259*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_272 = tmp_kernel_op_34*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_271 - tmp_moved_constant_1);
+             const real_t tmp_kernel_op_273 = tmp_kernel_op_34*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_271 - tmp_moved_constant_3);
+             const real_t tmp_kernel_op_274 = tmp_kernel_op_272*tmp_kernel_op_29 + tmp_kernel_op_273*tmp_kernel_op_32;
+             const real_t tmp_kernel_op_275 = tmp_kernel_op_272*tmp_kernel_op_41 + tmp_kernel_op_273*tmp_kernel_op_31;
+             const real_t tmp_kernel_op_277 = tmp_kernel_op_76*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_276 - tmp_moved_constant_5);
+             const real_t tmp_kernel_op_278 = tmp_kernel_op_76*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_276 - tmp_moved_constant_7);
+             const real_t tmp_kernel_op_279 = tmp_kernel_op_277*tmp_kernel_op_71 + tmp_kernel_op_278*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_280 = tmp_kernel_op_277*tmp_kernel_op_83 + tmp_kernel_op_278*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_282 = tmp_kernel_op_114*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_281 - tmp_moved_constant_9);
+             const real_t tmp_kernel_op_283 = tmp_kernel_op_114*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_281 - tmp_moved_constant_11);
+             const real_t tmp_kernel_op_284 = tmp_kernel_op_109*tmp_kernel_op_282 + tmp_kernel_op_112*tmp_kernel_op_283;
+             const real_t tmp_kernel_op_285 = tmp_kernel_op_111*tmp_kernel_op_283 + tmp_kernel_op_121*tmp_kernel_op_282;
+             const real_t tmp_kernel_op_287 = tmp_kernel_op_152*(jac_affine_inv_0_1_BLUE*tmp_kernel_op_286 - tmp_moved_constant_13);
+             const real_t tmp_kernel_op_288 = tmp_kernel_op_152*(jac_affine_inv_0_0_BLUE*tmp_kernel_op_286 - tmp_moved_constant_15);
+             const real_t tmp_kernel_op_289 = tmp_kernel_op_147*tmp_kernel_op_287 + tmp_kernel_op_150*tmp_kernel_op_288;
+             const real_t tmp_kernel_op_290 = tmp_kernel_op_149*tmp_kernel_op_288 + tmp_kernel_op_159*tmp_kernel_op_287;
+             const real_t tmp_kernel_op_291 = tmp_kernel_op_136*(tmp_kernel_op_120*tmp_kernel_op_284 + tmp_kernel_op_122*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_158*tmp_kernel_op_289 + tmp_kernel_op_160*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_274*tmp_kernel_op_40 + tmp_kernel_op_275*tmp_kernel_op_42) + tmp_kernel_op_98*(tmp_kernel_op_279*tmp_kernel_op_82 + tmp_kernel_op_280*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_292 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_209 + tmp_kernel_op_189*tmp_kernel_op_210) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_214 + tmp_kernel_op_194*tmp_kernel_op_215) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_199 + tmp_kernel_op_179*tmp_kernel_op_200) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_204 + tmp_kernel_op_184*tmp_kernel_op_205);
+             const real_t tmp_kernel_op_293 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_239 + tmp_kernel_op_189*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_247 + tmp_kernel_op_194*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_223 + tmp_kernel_op_179*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_231 + tmp_kernel_op_184*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_294 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_263 + tmp_kernel_op_189*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_268 + tmp_kernel_op_194*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_253 + tmp_kernel_op_179*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_258 + tmp_kernel_op_184*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_295 = tmp_kernel_op_136*(tmp_kernel_op_188*tmp_kernel_op_284 + tmp_kernel_op_189*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_193*tmp_kernel_op_289 + tmp_kernel_op_194*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_178*tmp_kernel_op_274 + tmp_kernel_op_179*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_183*tmp_kernel_op_279 + tmp_kernel_op_184*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_296 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_239 + tmp_kernel_op_210*tmp_kernel_op_240) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_247 + tmp_kernel_op_215*tmp_kernel_op_248) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_223 + tmp_kernel_op_200*tmp_kernel_op_224) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_231 + tmp_kernel_op_205*tmp_kernel_op_232);
+             const real_t tmp_kernel_op_297 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_263 + tmp_kernel_op_210*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_268 + tmp_kernel_op_215*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_253 + tmp_kernel_op_200*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_258 + tmp_kernel_op_205*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_298 = tmp_kernel_op_136*(tmp_kernel_op_209*tmp_kernel_op_284 + tmp_kernel_op_210*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_214*tmp_kernel_op_289 + tmp_kernel_op_215*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_199*tmp_kernel_op_274 + tmp_kernel_op_200*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_204*tmp_kernel_op_279 + tmp_kernel_op_205*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_299 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_263 + tmp_kernel_op_240*tmp_kernel_op_264) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_268 + tmp_kernel_op_248*tmp_kernel_op_269) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_253 + tmp_kernel_op_224*tmp_kernel_op_254) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_258 + tmp_kernel_op_232*tmp_kernel_op_259);
+             const real_t tmp_kernel_op_300 = tmp_kernel_op_136*(tmp_kernel_op_239*tmp_kernel_op_284 + tmp_kernel_op_240*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_247*tmp_kernel_op_289 + tmp_kernel_op_248*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_223*tmp_kernel_op_274 + tmp_kernel_op_224*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_231*tmp_kernel_op_279 + tmp_kernel_op_232*tmp_kernel_op_280);
+             const real_t tmp_kernel_op_301 = tmp_kernel_op_136*(tmp_kernel_op_263*tmp_kernel_op_284 + tmp_kernel_op_264*tmp_kernel_op_285) + tmp_kernel_op_174*(tmp_kernel_op_268*tmp_kernel_op_289 + tmp_kernel_op_269*tmp_kernel_op_290) + tmp_kernel_op_60*(tmp_kernel_op_253*tmp_kernel_op_274 + tmp_kernel_op_254*tmp_kernel_op_275) + tmp_kernel_op_98*(tmp_kernel_op_258*tmp_kernel_op_279 + tmp_kernel_op_259*tmp_kernel_op_280);
+             const real_t elMat_0_0 = tmp_kernel_op_136*((tmp_kernel_op_120*tmp_kernel_op_120) + (tmp_kernel_op_122*tmp_kernel_op_122)) + tmp_kernel_op_174*((tmp_kernel_op_158*tmp_kernel_op_158) + (tmp_kernel_op_160*tmp_kernel_op_160)) + tmp_kernel_op_60*((tmp_kernel_op_40*tmp_kernel_op_40) + (tmp_kernel_op_42*tmp_kernel_op_42)) + tmp_kernel_op_98*((tmp_kernel_op_82*tmp_kernel_op_82) + (tmp_kernel_op_84*tmp_kernel_op_84));
+             const real_t elMat_0_1 = tmp_kernel_op_195;
+             const real_t elMat_0_2 = tmp_kernel_op_216;
+             const real_t elMat_0_3 = tmp_kernel_op_249;
+             const real_t elMat_0_4 = tmp_kernel_op_270;
+             const real_t elMat_0_5 = tmp_kernel_op_291;
+             const real_t elMat_1_0 = tmp_kernel_op_195;
+             const real_t elMat_1_1 = tmp_kernel_op_136*((tmp_kernel_op_188*tmp_kernel_op_188) + (tmp_kernel_op_189*tmp_kernel_op_189)) + tmp_kernel_op_174*((tmp_kernel_op_193*tmp_kernel_op_193) + (tmp_kernel_op_194*tmp_kernel_op_194)) + tmp_kernel_op_60*((tmp_kernel_op_178*tmp_kernel_op_178) + (tmp_kernel_op_179*tmp_kernel_op_179)) + tmp_kernel_op_98*((tmp_kernel_op_183*tmp_kernel_op_183) + (tmp_kernel_op_184*tmp_kernel_op_184));
+             const real_t elMat_1_2 = tmp_kernel_op_292;
+             const real_t elMat_1_3 = tmp_kernel_op_293;
+             const real_t elMat_1_4 = tmp_kernel_op_294;
+             const real_t elMat_1_5 = tmp_kernel_op_295;
+             const real_t elMat_2_0 = tmp_kernel_op_216;
+             const real_t elMat_2_1 = tmp_kernel_op_292;
+             const real_t elMat_2_2 = tmp_kernel_op_136*((tmp_kernel_op_209*tmp_kernel_op_209) + (tmp_kernel_op_210*tmp_kernel_op_210)) + tmp_kernel_op_174*((tmp_kernel_op_214*tmp_kernel_op_214) + (tmp_kernel_op_215*tmp_kernel_op_215)) + tmp_kernel_op_60*((tmp_kernel_op_199*tmp_kernel_op_199) + (tmp_kernel_op_200*tmp_kernel_op_200)) + tmp_kernel_op_98*((tmp_kernel_op_204*tmp_kernel_op_204) + (tmp_kernel_op_205*tmp_kernel_op_205));
+             const real_t elMat_2_3 = tmp_kernel_op_296;
+             const real_t elMat_2_4 = tmp_kernel_op_297;
+             const real_t elMat_2_5 = tmp_kernel_op_298;
+             const real_t elMat_3_0 = tmp_kernel_op_249;
+             const real_t elMat_3_1 = tmp_kernel_op_293;
+             const real_t elMat_3_2 = tmp_kernel_op_296;
+             const real_t elMat_3_3 = tmp_kernel_op_136*((tmp_kernel_op_239*tmp_kernel_op_239) + (tmp_kernel_op_240*tmp_kernel_op_240)) + tmp_kernel_op_174*((tmp_kernel_op_247*tmp_kernel_op_247) + (tmp_kernel_op_248*tmp_kernel_op_248)) + tmp_kernel_op_60*((tmp_kernel_op_223*tmp_kernel_op_223) + (tmp_kernel_op_224*tmp_kernel_op_224)) + tmp_kernel_op_98*((tmp_kernel_op_231*tmp_kernel_op_231) + (tmp_kernel_op_232*tmp_kernel_op_232));
+             const real_t elMat_3_4 = tmp_kernel_op_299;
+             const real_t elMat_3_5 = tmp_kernel_op_300;
+             const real_t elMat_4_0 = tmp_kernel_op_270;
+             const real_t elMat_4_1 = tmp_kernel_op_294;
+             const real_t elMat_4_2 = tmp_kernel_op_297;
+             const real_t elMat_4_3 = tmp_kernel_op_299;
+             const real_t elMat_4_4 = tmp_kernel_op_136*((tmp_kernel_op_263*tmp_kernel_op_263) + (tmp_kernel_op_264*tmp_kernel_op_264)) + tmp_kernel_op_174*((tmp_kernel_op_268*tmp_kernel_op_268) + (tmp_kernel_op_269*tmp_kernel_op_269)) + tmp_kernel_op_60*((tmp_kernel_op_253*tmp_kernel_op_253) + (tmp_kernel_op_254*tmp_kernel_op_254)) + tmp_kernel_op_98*((tmp_kernel_op_258*tmp_kernel_op_258) + (tmp_kernel_op_259*tmp_kernel_op_259));
+             const real_t elMat_4_5 = tmp_kernel_op_301;
+             const real_t elMat_5_0 = tmp_kernel_op_291;
+             const real_t elMat_5_1 = tmp_kernel_op_295;
+             const real_t elMat_5_2 = tmp_kernel_op_298;
+             const real_t elMat_5_3 = tmp_kernel_op_300;
+             const real_t elMat_5_4 = tmp_kernel_op_301;
+             const real_t elMat_5_5 = tmp_kernel_op_136*((tmp_kernel_op_284*tmp_kernel_op_284) + (tmp_kernel_op_285*tmp_kernel_op_285)) + tmp_kernel_op_174*((tmp_kernel_op_289*tmp_kernel_op_289) + (tmp_kernel_op_290*tmp_kernel_op_290)) + tmp_kernel_op_60*((tmp_kernel_op_274*tmp_kernel_op_274) + (tmp_kernel_op_275*tmp_kernel_op_275)) + tmp_kernel_op_98*((tmp_kernel_op_279*tmp_kernel_op_279) + (tmp_kernel_op_280*tmp_kernel_op_280));
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/CMakeLists.txt b/operators/divergence/CMakeLists.txt
index 6cd8762ee8f8f310b35afaf271072bd1d801d8b2..a3729803e8768c36970de4d38330fbb416bda1d9 100644
--- a/operators/divergence/CMakeLists.txt
+++ b/operators/divergence/CMakeLists.txt
@@ -1,5 +1,9 @@
 add_library( opgen-divergence
 
+   P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp
+   P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp
+   P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp
+   P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp
    P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp
    P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp
    P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp
@@ -17,6 +21,8 @@ add_library( opgen-divergence
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-divergence PRIVATE
 
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
       avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
       avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
@@ -25,6 +31,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp
       avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp
       avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
       noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
       noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
       noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
@@ -37,6 +45,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
 
    set_source_files_properties(
 
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
       avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
       avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp
@@ -55,6 +65,10 @@ else()
 
    target_sources(opgen-divergence PRIVATE
 
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
+      noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
       noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp
       noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
       noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3507a2b2a8c749735a26b26eae485d94e6493c4d
--- /dev/null
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp
@@ -0,0 +1,261 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ToP1ElementwiseDivergenceAnnulusMap_0_0::P2ToP1ElementwiseDivergenceAnnulusMap_0_0(
+    const std::shared_ptr< PrimitiveStorage >& storage,
+    size_t                                     minLevel,
+    size_t                                     maxLevel )
+: Operator( storage, minLevel, maxLevel )
+{}
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t >& src,
+                                                       const P1Function< real_t >& dst,
+                                                       uint_t                      level,
+                                                       DoFType                     flag,
+                                                       UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dst       = face.getData( dst.getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx       = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dst[arrayIdx] = real_t( 0 );
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dst,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.communicateAdditively< Face, Edge >( level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.communicateAdditively< Face, Vertex >( level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                          const P2Function< idx_t >&                  src,
+                                                          const P1Function< idx_t >&                  dst,
+                                                          uint_t                                      level,
+                                                          DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_dst       = face.getData( dst.getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dst,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..88b436f4420e20aa9cf3ca28afa690196b8a1d7a
--- /dev/null
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp
@@ -0,0 +1,135 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p1functionspace/P1Function.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Divergence.
+///
+/// Component:    0
+/// Geometry map: AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (scalar space:    Lagrange, degree: 1)
+///
+///     ∫ - ( ∇ · u ) v
+
+class P2ToP1ElementwiseDivergenceAnnulusMap_0_0 : public Operator< P2Function< real_t >, P1Function< real_t > >
+{
+ public:
+   P2ToP1ElementwiseDivergenceAnnulusMap_0_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                              size_t                                     minLevel,
+                                              size_t                                     maxLevel );
+
+   void apply( const P2Function< real_t >& src,
+               const P1Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P1Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    220     318      17      12      3              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dst,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    202     300      17      12      3              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..178e4cb105e8942931a9bce178bbbb4b7baeebe3
--- /dev/null
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp
@@ -0,0 +1,261 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ToP1ElementwiseDivergenceAnnulusMap_0_1::P2ToP1ElementwiseDivergenceAnnulusMap_0_1(
+    const std::shared_ptr< PrimitiveStorage >& storage,
+    size_t                                     minLevel,
+    size_t                                     maxLevel )
+: Operator( storage, minLevel, maxLevel )
+{}
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t >& src,
+                                                       const P1Function< real_t >& dst,
+                                                       uint_t                      level,
+                                                       DoFType                     flag,
+                                                       UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dst       = face.getData( dst.getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx       = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dst[arrayIdx] = real_t( 0 );
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dst,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.communicateAdditively< Face, Edge >( level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.communicateAdditively< Face, Vertex >( level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                          const P2Function< idx_t >&                  src,
+                                                          const P1Function< idx_t >&                  dst,
+                                                          uint_t                                      level,
+                                                          DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_dst       = face.getData( dst.getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dst,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2abd5fce429dd9671fdb4dc7572af4698681607a
--- /dev/null
+++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp
@@ -0,0 +1,135 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p1functionspace/P1Function.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Divergence.
+///
+/// Component:    1
+/// Geometry map: AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (scalar space:    Lagrange, degree: 1)
+///
+///     ∫ - ( ∇ · u ) v
+
+class P2ToP1ElementwiseDivergenceAnnulusMap_0_1 : public Operator< P2Function< real_t >, P1Function< real_t > >
+{
+ public:
+   P2ToP1ElementwiseDivergenceAnnulusMap_0_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                              size_t                                     minLevel,
+                                              size_t                                     maxLevel );
+
+   void apply( const P2Function< real_t >& src,
+               const P1Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P1Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    220     318      17      12      3              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dst,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    202     300      17      12      3              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..408757f3f9a1aaebef98fd713d12164e596c5523
--- /dev/null
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,834 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_13 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_15 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_16 = tmp_kernel_op_15*1.0 / (tmp_kernel_op_0*tmp_kernel_op_12 - tmp_kernel_op_13*tmp_kernel_op_14);
+       const real_t tmp_kernel_op_17 = tmp_kernel_op_16*1.0;
+       const real_t tmp_kernel_op_28 = -tmp_kernel_op_13;
+       const real_t tmp_kernel_op_38 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_39 = -tmp_kernel_op_15*1.0 / (-tmp_kernel_op_12*tmp_kernel_op_38 + tmp_kernel_op_14*tmp_kernel_op_28);
+       const real_t tmp_kernel_op_40 = tmp_kernel_op_39*1.0;
+       const real_t tmp_kernel_op_44 = -rayVertex_1;
+       const real_t tmp_kernel_op_45 = -rayVertex_0;
+       const real_t tmp_kernel_op_55 = 0.66666666666666663;
+       const real_t tmp_kernel_op_89 = 2.6666666666666665;
+       const real_t tmp_kernel_op_123 = 0.66666666666666663;
+       const real_t tmp_kernel_op_126 = 2.6666666666666665;
+       const real_t tmp_kernel_op_129 = 0.66666666666666663;
+       const real_t tmp_kernel_op_132 = 0.66666666666666663;
+       const real_t tmp_kernel_op_149 = tmp_kernel_op_126 + tmp_kernel_op_55 - 3.0;
+       const real_t tmp_kernel_op_151 = tmp_kernel_op_129 + tmp_kernel_op_89 - 3.0;
+       const real_t tmp_kernel_op_153 = tmp_kernel_op_123 + tmp_kernel_op_132 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d tmp_kernel_op_1 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_10 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_11 = _mm256_add_pd(tmp_kernel_op_10,tmp_kernel_op_5);
+                const __m256d tmp_kernel_op_18 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_11)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_19 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_9),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_21 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_11),_mm256_mul_pd(tmp_kernel_op_11,tmp_kernel_op_11)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_24 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_5)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_10,tmp_kernel_op_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_25))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_27 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666674,0.16666666666666674,0.16666666666666674,0.16666666666666674));
+                const __m256d tmp_kernel_op_29 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_32 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_34 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_35 = _mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_34);
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_31);
+                const __m256d tmp_kernel_op_37 = _mm256_add_pd(tmp_kernel_op_35,tmp_kernel_op_36);
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_37)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_37),_mm256_mul_pd(tmp_kernel_op_37,tmp_kernel_op_37));
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_34),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_31),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_47 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_35,tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_48 = _mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_43,tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_50 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_34),tmp_kernel_op_49),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_51 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_47,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_48,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(tmp_kernel_op_36,tmp_kernel_op_49))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_48,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_34),tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_51);
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_50,tmp_kernel_op_51),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_54 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)));
+                const __m256d tmp_kernel_op_57 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_59 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(tmp_kernel_op_58,tmp_kernel_op_60);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_61)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_63 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_59),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_61),_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_66 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_69 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_58,tmp_kernel_op_68)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_60,tmp_kernel_op_66),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,tmp_kernel_op_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_69))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_71 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666671,0.16666666666666671,0.16666666666666671,0.16666666666666671));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_73 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_74 = _mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_73);
+                const __m256d tmp_kernel_op_75 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_72);
+                const __m256d tmp_kernel_op_76 = _mm256_add_pd(tmp_kernel_op_74,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_76)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_78 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_79 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_76),_mm256_mul_pd(tmp_kernel_op_76,tmp_kernel_op_76));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_73),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_81 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_82 = _mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_73),tmp_kernel_op_83),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_85 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_81,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_82,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(tmp_kernel_op_75,tmp_kernel_op_83))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_84,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_73),tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_88 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)));
+                const __m256d tmp_kernel_op_91 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_91);
+                const __m256d tmp_kernel_op_93 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_95 = _mm256_add_pd(tmp_kernel_op_92,tmp_kernel_op_94);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_95)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_97 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_93),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_99 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_95),_mm256_mul_pd(tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_102 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_103)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_92)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_94),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                const __m256d tmp_kernel_op_105 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.66666666666666674,0.66666666666666674,0.66666666666666674,0.66666666666666674));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_108 = _mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_107);
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_106);
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(tmp_kernel_op_108,tmp_kernel_op_109);
+                const __m256d tmp_kernel_op_111 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_110)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_110),_mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_110));
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_107),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_106),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_115 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_116 = _mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_113,tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_107),tmp_kernel_op_117),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_119 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_116,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_117))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_118,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_107),tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_120 = _mm256_mul_pd(tmp_kernel_op_115,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_119),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_122 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)));
+                const __m256d tmp_kernel_op_125 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)));
+                const __m256d tmp_kernel_op_128 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)));
+                const __m256d tmp_kernel_op_131 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_133 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126));
+                const __m256d tmp_kernel_op_136 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_134,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89));
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129));
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_138,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_140,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_141,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_143 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_135);
+                const __m256d tmp_kernel_op_144 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_141);
+                const __m256d tmp_kernel_op_146 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_134);
+                const __m256d tmp_kernel_op_147 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_137);
+                const __m256d tmp_kernel_op_148 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_140);
+                const __m256d tmp_kernel_op_150 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)));
+                const __m256d tmp_kernel_op_152 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)));
+                const __m256d tmp_kernel_op_154 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_142),_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_71))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_145),_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_148),_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_154),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_124),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_133),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_71),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_157))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_155),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_156),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_160))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_158),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_159),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t tmp_kernel_op_1 = -p_affine_0_0;
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+                const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+                const real_t tmp_kernel_op_6 = -p_affine_0_1;
+                const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+                const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+                const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_26 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+                const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+                const real_t tmp_kernel_op_29 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_30 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_31 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+                const real_t tmp_kernel_op_32 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_33 = -tmp_kernel_op_3;
+                const real_t tmp_kernel_op_34 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.66666666666666663;
+                const real_t tmp_kernel_op_35 = (tmp_kernel_op_34*tmp_kernel_op_34);
+                const real_t tmp_kernel_op_36 = (tmp_kernel_op_31*tmp_kernel_op_31);
+                const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36;
+                const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+                const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_34 + tmp_kernel_op_45));
+                const real_t tmp_kernel_op_47 = -tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+                const real_t tmp_kernel_op_48 = tmp_kernel_op_34*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_49 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+                const real_t tmp_kernel_op_50 = -tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_42;
+                const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_47*(tmp_kernel_op_36*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_48) - tmp_kernel_op_50*(-tmp_kernel_op_28*tmp_kernel_op_48 - tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_43*tmp_kernel_op_46));
+                const real_t tmp_kernel_op_52 = tmp_kernel_op_47*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_53 = -tmp_kernel_op_50*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_52 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_53;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+                const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+                const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+                const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+                const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+                const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+                const real_t tmp_kernel_op_70 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+                const real_t tmp_kernel_op_72 = p_affine_0_1 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_73 = p_affine_0_0 + tmp_kernel_op_32*0.66666666666666663 + tmp_kernel_op_33*0.16666666666666666;
+                const real_t tmp_kernel_op_74 = (tmp_kernel_op_73*tmp_kernel_op_73);
+                const real_t tmp_kernel_op_75 = (tmp_kernel_op_72*tmp_kernel_op_72);
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_74 + tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+                const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+                const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_44 + tmp_kernel_op_72) + tmp_kernel_op_38*(tmp_kernel_op_45 + tmp_kernel_op_73));
+                const real_t tmp_kernel_op_81 = -tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_73*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_83 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_81*(tmp_kernel_op_38*tmp_kernel_op_82 + tmp_kernel_op_75*tmp_kernel_op_83) - tmp_kernel_op_84*(-tmp_kernel_op_28*tmp_kernel_op_82 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80));
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_81*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = -tmp_kernel_op_84*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_86 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_87;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+                const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+                const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+                const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+                const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+                const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_104 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+                const real_t tmp_kernel_op_106 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_107 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.16666666666666666;
+                const real_t tmp_kernel_op_108 = (tmp_kernel_op_107*tmp_kernel_op_107);
+                const real_t tmp_kernel_op_109 = (tmp_kernel_op_106*tmp_kernel_op_106);
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_108 + tmp_kernel_op_109;
+                const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+                const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_107 + tmp_kernel_op_45));
+                const real_t tmp_kernel_op_115 = tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_112*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_107*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_117 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+                const real_t tmp_kernel_op_118 = -tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_117 + tmp_kernel_op_112*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_115*(tmp_kernel_op_109*tmp_kernel_op_117 + tmp_kernel_op_116*tmp_kernel_op_38) - tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114 - tmp_kernel_op_116*tmp_kernel_op_28));
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_115*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = -tmp_kernel_op_118*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_120 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+                const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_52 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_53;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+                const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_86 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_87;
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+                const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_120 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+                const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+                const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+                const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+                const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+                const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+                const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+                const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+                const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d tmp_kernel_op_1 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_10 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_11 = _mm256_add_pd(tmp_kernel_op_10,tmp_kernel_op_5);
+                const __m256d tmp_kernel_op_18 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_11)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_19 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_9),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_21 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_11),_mm256_mul_pd(tmp_kernel_op_11,tmp_kernel_op_11)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_24 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_5)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_10,tmp_kernel_op_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_25))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_27 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666674,0.16666666666666674,0.16666666666666674,0.16666666666666674));
+                const __m256d tmp_kernel_op_29 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_32 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_34 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_35 = _mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_34);
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_31);
+                const __m256d tmp_kernel_op_37 = _mm256_add_pd(tmp_kernel_op_35,tmp_kernel_op_36);
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_37)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_37),_mm256_mul_pd(tmp_kernel_op_37,tmp_kernel_op_37));
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_34),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_31),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_47 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_35,tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_48 = _mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_43,tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_50 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_34),tmp_kernel_op_49),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_51 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_47,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_48,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(tmp_kernel_op_36,tmp_kernel_op_49))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_48,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_34),tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(tmp_kernel_op_47,tmp_kernel_op_51);
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_50,tmp_kernel_op_51),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_54 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)));
+                const __m256d tmp_kernel_op_57 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_59 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(tmp_kernel_op_58,tmp_kernel_op_60);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_61)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_63 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_59),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_61),_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_66 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_69 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_58,tmp_kernel_op_68)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_60,tmp_kernel_op_66),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,tmp_kernel_op_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_69))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_71 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666671,0.16666666666666671,0.16666666666666671,0.16666666666666671));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_73 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_74 = _mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_73);
+                const __m256d tmp_kernel_op_75 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_72);
+                const __m256d tmp_kernel_op_76 = _mm256_add_pd(tmp_kernel_op_74,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_76)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_78 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_79 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_76),_mm256_mul_pd(tmp_kernel_op_76,tmp_kernel_op_76));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_73),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_81 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_82 = _mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_73),tmp_kernel_op_83),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_85 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_81,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_82,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(tmp_kernel_op_75,tmp_kernel_op_83))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_84,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_73),tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_88 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)));
+                const __m256d tmp_kernel_op_91 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_91);
+                const __m256d tmp_kernel_op_93 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_95 = _mm256_add_pd(tmp_kernel_op_92,tmp_kernel_op_94);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_95)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_97 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_93),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_99 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_95),_mm256_mul_pd(tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_102 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_103)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_92)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_94),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                const __m256d tmp_kernel_op_105 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.66666666666666674,0.66666666666666674,0.66666666666666674,0.66666666666666674));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_108 = _mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_107);
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_106);
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(tmp_kernel_op_108,tmp_kernel_op_109);
+                const __m256d tmp_kernel_op_111 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_110)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_110),_mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_110));
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_107),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_106),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_115 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_116 = _mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_113,tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_107),tmp_kernel_op_117),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_119 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_116,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_117))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_118,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_107),tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_120 = _mm256_mul_pd(tmp_kernel_op_115,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_119),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_122 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)));
+                const __m256d tmp_kernel_op_125 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)));
+                const __m256d tmp_kernel_op_128 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)));
+                const __m256d tmp_kernel_op_131 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_133 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126));
+                const __m256d tmp_kernel_op_136 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_134,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89));
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129));
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_138,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_140,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_141,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_143 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_135);
+                const __m256d tmp_kernel_op_144 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_141);
+                const __m256d tmp_kernel_op_146 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_134);
+                const __m256d tmp_kernel_op_147 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_137);
+                const __m256d tmp_kernel_op_148 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_140);
+                const __m256d tmp_kernel_op_150 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)));
+                const __m256d tmp_kernel_op_152 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)));
+                const __m256d tmp_kernel_op_154 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_142),_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_71))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_145),_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_148),_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_154),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_124),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_133),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_71),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_157))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_155),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_156),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_160))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_158),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_159),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t tmp_kernel_op_1 = -p_affine_0_0;
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+                const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+                const real_t tmp_kernel_op_6 = -p_affine_0_1;
+                const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+                const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+                const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_26 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+                const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+                const real_t tmp_kernel_op_29 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_30 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_31 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+                const real_t tmp_kernel_op_32 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_33 = -tmp_kernel_op_3;
+                const real_t tmp_kernel_op_34 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.66666666666666663;
+                const real_t tmp_kernel_op_35 = (tmp_kernel_op_34*tmp_kernel_op_34);
+                const real_t tmp_kernel_op_36 = (tmp_kernel_op_31*tmp_kernel_op_31);
+                const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36;
+                const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+                const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_34 + tmp_kernel_op_45));
+                const real_t tmp_kernel_op_47 = -tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+                const real_t tmp_kernel_op_48 = tmp_kernel_op_34*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_49 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+                const real_t tmp_kernel_op_50 = -tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_42;
+                const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_47*(tmp_kernel_op_36*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_48) - tmp_kernel_op_50*(-tmp_kernel_op_28*tmp_kernel_op_48 - tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_43*tmp_kernel_op_46));
+                const real_t tmp_kernel_op_52 = tmp_kernel_op_47*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_53 = -tmp_kernel_op_50*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_52 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_53;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+                const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+                const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+                const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+                const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+                const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+                const real_t tmp_kernel_op_70 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+                const real_t tmp_kernel_op_72 = p_affine_0_1 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_73 = p_affine_0_0 + tmp_kernel_op_32*0.66666666666666663 + tmp_kernel_op_33*0.16666666666666666;
+                const real_t tmp_kernel_op_74 = (tmp_kernel_op_73*tmp_kernel_op_73);
+                const real_t tmp_kernel_op_75 = (tmp_kernel_op_72*tmp_kernel_op_72);
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_74 + tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+                const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+                const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_44 + tmp_kernel_op_72) + tmp_kernel_op_38*(tmp_kernel_op_45 + tmp_kernel_op_73));
+                const real_t tmp_kernel_op_81 = -tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_73*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_83 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_81*(tmp_kernel_op_38*tmp_kernel_op_82 + tmp_kernel_op_75*tmp_kernel_op_83) - tmp_kernel_op_84*(-tmp_kernel_op_28*tmp_kernel_op_82 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80));
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_81*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = -tmp_kernel_op_84*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_86 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_87;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+                const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+                const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+                const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+                const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+                const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_104 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+                const real_t tmp_kernel_op_106 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_107 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.16666666666666666;
+                const real_t tmp_kernel_op_108 = (tmp_kernel_op_107*tmp_kernel_op_107);
+                const real_t tmp_kernel_op_109 = (tmp_kernel_op_106*tmp_kernel_op_106);
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_108 + tmp_kernel_op_109;
+                const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+                const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_107 + tmp_kernel_op_45));
+                const real_t tmp_kernel_op_115 = tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_112*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_107*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_117 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+                const real_t tmp_kernel_op_118 = -tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_117 + tmp_kernel_op_112*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_115*(tmp_kernel_op_109*tmp_kernel_op_117 + tmp_kernel_op_116*tmp_kernel_op_38) - tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114 - tmp_kernel_op_116*tmp_kernel_op_28));
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_115*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = -tmp_kernel_op_118*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_120 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+                const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_52 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_53;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+                const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_86 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_87;
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+                const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_120 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+                const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+                const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+                const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+                const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+                const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+                const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+                const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+                const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2a3399f7bb0c8bef274aac855363c863f521c53f
--- /dev/null
+++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
@@ -0,0 +1,834 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_13 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_15 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_16 = tmp_kernel_op_15*1.0 / (tmp_kernel_op_0*tmp_kernel_op_12 - tmp_kernel_op_13*tmp_kernel_op_14);
+       const real_t tmp_kernel_op_17 = tmp_kernel_op_16*1.0;
+       const real_t tmp_kernel_op_28 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_38 = -tmp_kernel_op_13;
+       const real_t tmp_kernel_op_39 = -tmp_kernel_op_15*1.0 / (-tmp_kernel_op_12*tmp_kernel_op_28 + tmp_kernel_op_14*tmp_kernel_op_38);
+       const real_t tmp_kernel_op_40 = tmp_kernel_op_39*1.0;
+       const real_t tmp_kernel_op_44 = -rayVertex_1;
+       const real_t tmp_kernel_op_45 = -rayVertex_0;
+       const real_t tmp_kernel_op_55 = 0.66666666666666663;
+       const real_t tmp_kernel_op_89 = 2.6666666666666665;
+       const real_t tmp_kernel_op_123 = 0.66666666666666663;
+       const real_t tmp_kernel_op_126 = 2.6666666666666665;
+       const real_t tmp_kernel_op_129 = 0.66666666666666663;
+       const real_t tmp_kernel_op_132 = 0.66666666666666663;
+       const real_t tmp_kernel_op_149 = tmp_kernel_op_126 + tmp_kernel_op_55 - 3.0;
+       const real_t tmp_kernel_op_151 = tmp_kernel_op_129 + tmp_kernel_op_89 - 3.0;
+       const real_t tmp_kernel_op_153 = tmp_kernel_op_123 + tmp_kernel_op_132 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d tmp_kernel_op_1 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_10 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_11 = _mm256_add_pd(tmp_kernel_op_10,tmp_kernel_op_5);
+                const __m256d tmp_kernel_op_18 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_11)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_19 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_9),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_21 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_11),_mm256_mul_pd(tmp_kernel_op_11,tmp_kernel_op_11)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_24 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_5)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_10,tmp_kernel_op_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_25))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_27 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666674,0.16666666666666674,0.16666666666666674,0.16666666666666674));
+                const __m256d tmp_kernel_op_29 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_32 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_31);
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_34 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_34,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_35,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_37 = _mm256_add_pd(tmp_kernel_op_32,tmp_kernel_op_36);
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_37)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_37),_mm256_mul_pd(tmp_kernel_op_37,tmp_kernel_op_37));
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_31),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_35),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_47 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_43,tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_48 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(tmp_kernel_op_36,tmp_kernel_op_47));
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(tmp_kernel_op_35,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_50 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_35),tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_51 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_49,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_35),tmp_kernel_op_47),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_48,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_49,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(tmp_kernel_op_48,tmp_kernel_op_51);
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_50,tmp_kernel_op_51);
+                const __m256d tmp_kernel_op_54 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)));
+                const __m256d tmp_kernel_op_57 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_59 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(tmp_kernel_op_58,tmp_kernel_op_60);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_61)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_63 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_59),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_61),_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_66 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_69 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_58,tmp_kernel_op_68)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_60,tmp_kernel_op_66),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,tmp_kernel_op_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_69))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_71 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666671,0.16666666666666671,0.16666666666666671,0.16666666666666671));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_73 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_72);
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_34,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_75 = _mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_74);
+                const __m256d tmp_kernel_op_76 = _mm256_add_pd(tmp_kernel_op_73,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_76)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_78 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_79 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_76),_mm256_mul_pd(tmp_kernel_op_76,tmp_kernel_op_76));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_72),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_74),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(tmp_kernel_op_75,tmp_kernel_op_81));
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_74),tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_85 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_84,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_83,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_74),tmp_kernel_op_81),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_82,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_83,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_88 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)));
+                const __m256d tmp_kernel_op_91 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_91);
+                const __m256d tmp_kernel_op_93 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_95 = _mm256_add_pd(tmp_kernel_op_92,tmp_kernel_op_94);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_95)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_97 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_93),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_99 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_95),_mm256_mul_pd(tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_102 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_103)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_92)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_94),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                const __m256d tmp_kernel_op_105 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.66666666666666674,0.66666666666666674,0.66666666666666674,0.66666666666666674));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_107 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_106);
+                const __m256d tmp_kernel_op_108 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_34,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_108);
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(tmp_kernel_op_107,tmp_kernel_op_109);
+                const __m256d tmp_kernel_op_111 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_110)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_110),_mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_110));
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_106),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_108),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_115 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_113,tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_116 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_115));
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_108),tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_119 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_118,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_117,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_108),tmp_kernel_op_115),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_116,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_120 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_122 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)));
+                const __m256d tmp_kernel_op_125 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)));
+                const __m256d tmp_kernel_op_128 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)));
+                const __m256d tmp_kernel_op_131 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_133 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126));
+                const __m256d tmp_kernel_op_136 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_134,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89));
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129));
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_138,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_140,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_141,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_143 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_135);
+                const __m256d tmp_kernel_op_144 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_141);
+                const __m256d tmp_kernel_op_146 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_134);
+                const __m256d tmp_kernel_op_147 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_137);
+                const __m256d tmp_kernel_op_148 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_140);
+                const __m256d tmp_kernel_op_150 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)));
+                const __m256d tmp_kernel_op_152 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)));
+                const __m256d tmp_kernel_op_154 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_142),_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_71))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_145),_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_148),_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_154),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_124),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_133),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_71),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_157))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_155),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_156),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_160))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_158),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_159),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t tmp_kernel_op_1 = -p_affine_0_0;
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+                const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+                const real_t tmp_kernel_op_6 = -p_affine_0_1;
+                const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+                const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+                const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_26 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+                const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+                const real_t tmp_kernel_op_29 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_30 = -tmp_kernel_op_3;
+                const real_t tmp_kernel_op_31 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+                const real_t tmp_kernel_op_32 = (tmp_kernel_op_31*tmp_kernel_op_31);
+                const real_t tmp_kernel_op_33 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_34 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_35 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.66666666666666663;
+                const real_t tmp_kernel_op_36 = (tmp_kernel_op_35*tmp_kernel_op_35);
+                const real_t tmp_kernel_op_37 = tmp_kernel_op_32 + tmp_kernel_op_36;
+                const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+                const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_35 + tmp_kernel_op_44));
+                const real_t tmp_kernel_op_47 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+                const real_t tmp_kernel_op_48 = tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_36*tmp_kernel_op_47;
+                const real_t tmp_kernel_op_49 = tmp_kernel_op_35*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_50 = tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0 + tmp_kernel_op_38*tmp_kernel_op_42;
+                const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_48*(tmp_kernel_op_32*tmp_kernel_op_43*tmp_kernel_op_46*1.0 - tmp_kernel_op_38*tmp_kernel_op_49) + tmp_kernel_op_50*(tmp_kernel_op_28*tmp_kernel_op_49 - tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_47));
+                const real_t tmp_kernel_op_52 = tmp_kernel_op_48*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_50*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_53 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+                const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+                const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+                const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+                const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+                const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+                const real_t tmp_kernel_op_70 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+                const real_t tmp_kernel_op_72 = p_affine_0_0 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_73 = (tmp_kernel_op_72*tmp_kernel_op_72);
+                const real_t tmp_kernel_op_74 = p_affine_0_1 + tmp_kernel_op_33*0.66666666666666663 + tmp_kernel_op_34*0.16666666666666666;
+                const real_t tmp_kernel_op_75 = (tmp_kernel_op_74*tmp_kernel_op_74);
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_73 + tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+                const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+                const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_45 + tmp_kernel_op_72) - tmp_kernel_op_38*(tmp_kernel_op_44 + tmp_kernel_op_74));
+                const real_t tmp_kernel_op_81 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_75*tmp_kernel_op_81;
+                const real_t tmp_kernel_op_83 = tmp_kernel_op_74*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_82*(-tmp_kernel_op_38*tmp_kernel_op_83 + tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80*1.0) + tmp_kernel_op_84*(tmp_kernel_op_28*tmp_kernel_op_83 - tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_81));
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_82*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_84*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_87 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+                const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+                const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+                const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+                const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+                const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_104 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+                const real_t tmp_kernel_op_106 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_107 = (tmp_kernel_op_106*tmp_kernel_op_106);
+                const real_t tmp_kernel_op_108 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.16666666666666666;
+                const real_t tmp_kernel_op_109 = (tmp_kernel_op_108*tmp_kernel_op_108);
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_107 + tmp_kernel_op_109;
+                const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+                const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_108 + tmp_kernel_op_44));
+                const real_t tmp_kernel_op_115 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_109*tmp_kernel_op_115 + tmp_kernel_op_112*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_117 = tmp_kernel_op_108*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 + tmp_kernel_op_112*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_116*(tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_117*tmp_kernel_op_38) + tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_115 + tmp_kernel_op_117*tmp_kernel_op_28));
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_116*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = tmp_kernel_op_118*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_121 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_120;
+                const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+                const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_53 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+                const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_87 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+                const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_121 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_120;
+                const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+                const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+                const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+                const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+                const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+                const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+                const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+                const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+                const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d tmp_kernel_op_1 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_5 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_8 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_10 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_11 = _mm256_add_pd(tmp_kernel_op_10,tmp_kernel_op_5);
+                const __m256d tmp_kernel_op_18 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_11)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_19 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_4);
+                const __m256d tmp_kernel_op_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_9),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_21 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_11),_mm256_mul_pd(tmp_kernel_op_11,tmp_kernel_op_11)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_22 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_23 = _mm256_mul_pd(tmp_kernel_op_18,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_24 = _mm256_mul_pd(tmp_kernel_op_21,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_20,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(tmp_kernel_op_4,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_5)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_10,tmp_kernel_op_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_19,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_24,tmp_kernel_op_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_23,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_22,tmp_kernel_op_25))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_27 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666674,0.16666666666666674,0.16666666666666674,0.16666666666666674));
+                const __m256d tmp_kernel_op_29 = _mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_30 = _mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_32 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_31);
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_34 = _mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_34,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_35,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_37 = _mm256_add_pd(tmp_kernel_op_32,tmp_kernel_op_36);
+                const __m256d tmp_kernel_op_41 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_37)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_37),_mm256_mul_pd(tmp_kernel_op_37,tmp_kernel_op_37));
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_31),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_35),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_47 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_43,tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_48 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(tmp_kernel_op_36,tmp_kernel_op_47));
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(tmp_kernel_op_35,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_50 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_42,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_35),tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_51 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_49,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_35),tmp_kernel_op_47),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_48,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_49,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_43),tmp_kernel_op_46),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(tmp_kernel_op_48,tmp_kernel_op_51);
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_50,tmp_kernel_op_51);
+                const __m256d tmp_kernel_op_54 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)));
+                const __m256d tmp_kernel_op_57 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_59 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_60 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_61 = _mm256_add_pd(tmp_kernel_op_58,tmp_kernel_op_60);
+                const __m256d tmp_kernel_op_62 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_61)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_63 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_59),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_61),_mm256_mul_pd(tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_66 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(tmp_kernel_op_59,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_65,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_64,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_69 = _mm256_mul_pd(tmp_kernel_op_57,tmp_kernel_op_59);
+                const __m256d tmp_kernel_op_70 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_58,tmp_kernel_op_68)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_60,tmp_kernel_op_66),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_63,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,tmp_kernel_op_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_67,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_69))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_71 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666671,0.16666666666666671,0.16666666666666671,0.16666666666666671));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_73 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_72);
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_34,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_75 = _mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_74);
+                const __m256d tmp_kernel_op_76 = _mm256_add_pd(tmp_kernel_op_73,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_76)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_78 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_79 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_76),_mm256_mul_pd(tmp_kernel_op_76,tmp_kernel_op_76));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_72),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_74),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(tmp_kernel_op_75,tmp_kernel_op_81));
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_77);
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_74),tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_85 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_84,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_83,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_74),tmp_kernel_op_81),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_82,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_83,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_73,tmp_kernel_op_79),tmp_kernel_op_80),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_88 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)));
+                const __m256d tmp_kernel_op_91 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_2,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_3,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_1);
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_91);
+                const __m256d tmp_kernel_op_93 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_6);
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_95 = _mm256_add_pd(tmp_kernel_op_92,tmp_kernel_op_94);
+                const __m256d tmp_kernel_op_96 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_95)),_mm256_set_pd(tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_97 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_93),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_99 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_95),_mm256_mul_pd(tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_93,tmp_kernel_op_96);
+                const __m256d tmp_kernel_op_102 = _mm256_mul_pd(tmp_kernel_op_99,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_98,_mm256_set_pd(tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16,tmp_kernel_op_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_103)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_103),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_101,_mm256_set_pd(tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13,tmp_kernel_op_13)),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_92)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_97,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_94),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                const __m256d tmp_kernel_op_105 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.66666666666666674,0.66666666666666674,0.66666666666666674,0.66666666666666674));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_29,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_30,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_107 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_106);
+                const __m256d tmp_kernel_op_108 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_34,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_108);
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(tmp_kernel_op_107,tmp_kernel_op_109);
+                const __m256d tmp_kernel_op_111 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_110)),_mm256_set_pd(tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40,tmp_kernel_op_40));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_110),_mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_110));
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45,tmp_kernel_op_45),tmp_kernel_op_106),_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44,tmp_kernel_op_44),tmp_kernel_op_108),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38))),_mm256_set_pd(tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39,tmp_kernel_op_39)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_115 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_113,tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_116 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_115));
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_108),tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_119 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_118,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_117,_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_106,tmp_kernel_op_108),tmp_kernel_op_115),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_116,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38,tmp_kernel_op_38)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_113),tmp_kernel_op_114),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_120 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_122 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)));
+                const __m256d tmp_kernel_op_125 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_53,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_52,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)));
+                const __m256d tmp_kernel_op_128 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)));
+                const __m256d tmp_kernel_op_131 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_121,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_120,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_133 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126));
+                const __m256d tmp_kernel_op_136 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_134,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89));
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129));
+                const __m256d tmp_kernel_op_139 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_138,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_140,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_141,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_143 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55,tmp_kernel_op_55)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_135);
+                const __m256d tmp_kernel_op_144 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89,tmp_kernel_op_89)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_145 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123,tmp_kernel_op_123)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_141);
+                const __m256d tmp_kernel_op_146 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126,tmp_kernel_op_126)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_134);
+                const __m256d tmp_kernel_op_147 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129,tmp_kernel_op_129)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_137);
+                const __m256d tmp_kernel_op_148 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_140);
+                const __m256d tmp_kernel_op_150 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_125,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149,tmp_kernel_op_149)));
+                const __m256d tmp_kernel_op_152 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_128,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_88,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151,tmp_kernel_op_151)));
+                const __m256d tmp_kernel_op_154 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153,tmp_kernel_op_153)));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_70,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_142),_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_71))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_145),_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_148),_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_154),_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_27)),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_71)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_124),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_133),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_71),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_157))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_155),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_156)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_157)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_155,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_156,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_155),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_156),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_157),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_3,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_139,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_160))),_mm256_mul_pd(src_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_144,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_147,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_148,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_150,tmp_kernel_op_158),_mm256_mul_pd(tmp_kernel_op_152,tmp_kernel_op_159)),_mm256_mul_pd(tmp_kernel_op_154,tmp_kernel_op_160)))),_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_124,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_158,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_159,tmp_kernel_op_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_158),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_130,tmp_kernel_op_159),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_133,tmp_kernel_op_160),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t tmp_kernel_op_1 = -p_affine_0_0;
+                const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+                const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+                const real_t tmp_kernel_op_6 = -p_affine_0_1;
+                const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+                const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+                const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+                const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+                const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_26 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+                const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+                const real_t tmp_kernel_op_29 = -tmp_kernel_op_2;
+                const real_t tmp_kernel_op_30 = -tmp_kernel_op_3;
+                const real_t tmp_kernel_op_31 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+                const real_t tmp_kernel_op_32 = (tmp_kernel_op_31*tmp_kernel_op_31);
+                const real_t tmp_kernel_op_33 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_34 = -tmp_kernel_op_8;
+                const real_t tmp_kernel_op_35 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.66666666666666663;
+                const real_t tmp_kernel_op_36 = (tmp_kernel_op_35*tmp_kernel_op_35);
+                const real_t tmp_kernel_op_37 = tmp_kernel_op_32 + tmp_kernel_op_36;
+                const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+                const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_35 + tmp_kernel_op_44));
+                const real_t tmp_kernel_op_47 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+                const real_t tmp_kernel_op_48 = tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_36*tmp_kernel_op_47;
+                const real_t tmp_kernel_op_49 = tmp_kernel_op_35*tmp_kernel_op_41;
+                const real_t tmp_kernel_op_50 = tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0 + tmp_kernel_op_38*tmp_kernel_op_42;
+                const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_48*(tmp_kernel_op_32*tmp_kernel_op_43*tmp_kernel_op_46*1.0 - tmp_kernel_op_38*tmp_kernel_op_49) + tmp_kernel_op_50*(tmp_kernel_op_28*tmp_kernel_op_49 - tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_47));
+                const real_t tmp_kernel_op_52 = tmp_kernel_op_48*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_50*tmp_kernel_op_51;
+                const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_53 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+                const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+                const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+                const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+                const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+                const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+                const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+                const real_t tmp_kernel_op_70 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+                const real_t tmp_kernel_op_72 = p_affine_0_0 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_73 = (tmp_kernel_op_72*tmp_kernel_op_72);
+                const real_t tmp_kernel_op_74 = p_affine_0_1 + tmp_kernel_op_33*0.66666666666666663 + tmp_kernel_op_34*0.16666666666666666;
+                const real_t tmp_kernel_op_75 = (tmp_kernel_op_74*tmp_kernel_op_74);
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_73 + tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+                const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+                const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_45 + tmp_kernel_op_72) - tmp_kernel_op_38*(tmp_kernel_op_44 + tmp_kernel_op_74));
+                const real_t tmp_kernel_op_81 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_75*tmp_kernel_op_81;
+                const real_t tmp_kernel_op_83 = tmp_kernel_op_74*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+                const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_82*(-tmp_kernel_op_38*tmp_kernel_op_83 + tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80*1.0) + tmp_kernel_op_84*(tmp_kernel_op_28*tmp_kernel_op_83 - tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_81));
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_82*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_84*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_87 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+                const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+                const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+                const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+                const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+                const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+                const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_104 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+                const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+                const real_t tmp_kernel_op_106 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+                const real_t tmp_kernel_op_107 = (tmp_kernel_op_106*tmp_kernel_op_106);
+                const real_t tmp_kernel_op_108 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.16666666666666666;
+                const real_t tmp_kernel_op_109 = (tmp_kernel_op_108*tmp_kernel_op_108);
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_107 + tmp_kernel_op_109;
+                const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+                const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_108 + tmp_kernel_op_44));
+                const real_t tmp_kernel_op_115 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_109*tmp_kernel_op_115 + tmp_kernel_op_112*tmp_kernel_op_28;
+                const real_t tmp_kernel_op_117 = tmp_kernel_op_108*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 + tmp_kernel_op_112*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_116*(tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_117*tmp_kernel_op_38) + tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_115 + tmp_kernel_op_117*tmp_kernel_op_28));
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_116*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = tmp_kernel_op_118*tmp_kernel_op_119;
+                const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_121 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_120;
+                const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+                const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_53 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_52;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+                const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_87 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_86;
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+                const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_121 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_120;
+                const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+                const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+                const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+                const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+                const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+                const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+                const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+                const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+                const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+                const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+                const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3928d86c6bf0f613469f7b3255775903f6d5a190
--- /dev/null
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,484 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_13 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_15 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_16 = tmp_kernel_op_15*1.0 / (tmp_kernel_op_0*tmp_kernel_op_12 - tmp_kernel_op_13*tmp_kernel_op_14);
+       const real_t tmp_kernel_op_17 = tmp_kernel_op_16*1.0;
+       const real_t tmp_kernel_op_28 = -tmp_kernel_op_13;
+       const real_t tmp_kernel_op_38 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_39 = -tmp_kernel_op_15*1.0 / (-tmp_kernel_op_12*tmp_kernel_op_38 + tmp_kernel_op_14*tmp_kernel_op_28);
+       const real_t tmp_kernel_op_40 = tmp_kernel_op_39*1.0;
+       const real_t tmp_kernel_op_44 = -rayVertex_1;
+       const real_t tmp_kernel_op_45 = -rayVertex_0;
+       const real_t tmp_kernel_op_55 = 0.66666666666666663;
+       const real_t tmp_kernel_op_89 = 2.6666666666666665;
+       const real_t tmp_kernel_op_123 = 0.66666666666666663;
+       const real_t tmp_kernel_op_126 = 2.6666666666666665;
+       const real_t tmp_kernel_op_129 = 0.66666666666666663;
+       const real_t tmp_kernel_op_132 = 0.66666666666666663;
+       const real_t tmp_kernel_op_149 = tmp_kernel_op_126 + tmp_kernel_op_55 - 3.0;
+       const real_t tmp_kernel_op_151 = tmp_kernel_op_129 + tmp_kernel_op_89 - 3.0;
+       const real_t tmp_kernel_op_153 = tmp_kernel_op_123 + tmp_kernel_op_132 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t tmp_kernel_op_1 = -p_affine_0_0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+             const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+             const real_t tmp_kernel_op_6 = -p_affine_0_1;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+             const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+             const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_26 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+             const real_t tmp_kernel_op_29 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_30 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_31 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+             const real_t tmp_kernel_op_32 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_33 = -tmp_kernel_op_3;
+             const real_t tmp_kernel_op_34 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.66666666666666663;
+             const real_t tmp_kernel_op_35 = (tmp_kernel_op_34*tmp_kernel_op_34);
+             const real_t tmp_kernel_op_36 = (tmp_kernel_op_31*tmp_kernel_op_31);
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36;
+             const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+             const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_34 + tmp_kernel_op_45));
+             const real_t tmp_kernel_op_47 = -tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+             const real_t tmp_kernel_op_48 = tmp_kernel_op_34*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+             const real_t tmp_kernel_op_50 = -tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_42;
+             const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_47*(tmp_kernel_op_36*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_48) - tmp_kernel_op_50*(-tmp_kernel_op_28*tmp_kernel_op_48 - tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_43*tmp_kernel_op_46));
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_47*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = -tmp_kernel_op_50*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_52 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_53;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+             const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+             const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+             const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+             const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+             const real_t tmp_kernel_op_70 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+             const real_t tmp_kernel_op_72 = p_affine_0_1 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_73 = p_affine_0_0 + tmp_kernel_op_32*0.66666666666666663 + tmp_kernel_op_33*0.16666666666666666;
+             const real_t tmp_kernel_op_74 = (tmp_kernel_op_73*tmp_kernel_op_73);
+             const real_t tmp_kernel_op_75 = (tmp_kernel_op_72*tmp_kernel_op_72);
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_74 + tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+             const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_44 + tmp_kernel_op_72) + tmp_kernel_op_38*(tmp_kernel_op_45 + tmp_kernel_op_73));
+             const real_t tmp_kernel_op_81 = -tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_73*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_83 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_81*(tmp_kernel_op_38*tmp_kernel_op_82 + tmp_kernel_op_75*tmp_kernel_op_83) - tmp_kernel_op_84*(-tmp_kernel_op_28*tmp_kernel_op_82 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80));
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_81*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = -tmp_kernel_op_84*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_86 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+             const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_104 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+             const real_t tmp_kernel_op_106 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_107 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.16666666666666666;
+             const real_t tmp_kernel_op_108 = (tmp_kernel_op_107*tmp_kernel_op_107);
+             const real_t tmp_kernel_op_109 = (tmp_kernel_op_106*tmp_kernel_op_106);
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_108 + tmp_kernel_op_109;
+             const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+             const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_107 + tmp_kernel_op_45));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_112*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_107*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_117 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+             const real_t tmp_kernel_op_118 = -tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_117 + tmp_kernel_op_112*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_115*(tmp_kernel_op_109*tmp_kernel_op_117 + tmp_kernel_op_116*tmp_kernel_op_38) - tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114 - tmp_kernel_op_116*tmp_kernel_op_28));
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_115*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = -tmp_kernel_op_118*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_120 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+             const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_52 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_53;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+             const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_86 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+             const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_120 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+             const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+             const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+             const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t tmp_kernel_op_1 = -p_affine_0_0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+             const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+             const real_t tmp_kernel_op_6 = -p_affine_0_1;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+             const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+             const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_26 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+             const real_t tmp_kernel_op_29 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_30 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_31 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+             const real_t tmp_kernel_op_32 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_33 = -tmp_kernel_op_3;
+             const real_t tmp_kernel_op_34 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.66666666666666663;
+             const real_t tmp_kernel_op_35 = (tmp_kernel_op_34*tmp_kernel_op_34);
+             const real_t tmp_kernel_op_36 = (tmp_kernel_op_31*tmp_kernel_op_31);
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_35 + tmp_kernel_op_36;
+             const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+             const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_34 + tmp_kernel_op_45));
+             const real_t tmp_kernel_op_47 = -tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+             const real_t tmp_kernel_op_48 = tmp_kernel_op_34*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+             const real_t tmp_kernel_op_50 = -tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_42;
+             const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_47*(tmp_kernel_op_36*tmp_kernel_op_49 + tmp_kernel_op_38*tmp_kernel_op_48) - tmp_kernel_op_50*(-tmp_kernel_op_28*tmp_kernel_op_48 - tmp_kernel_op_31*tmp_kernel_op_34*tmp_kernel_op_43*tmp_kernel_op_46));
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_47*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = -tmp_kernel_op_50*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_52 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_53;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+             const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+             const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+             const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+             const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+             const real_t tmp_kernel_op_70 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+             const real_t tmp_kernel_op_72 = p_affine_0_1 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_73 = p_affine_0_0 + tmp_kernel_op_32*0.66666666666666663 + tmp_kernel_op_33*0.16666666666666666;
+             const real_t tmp_kernel_op_74 = (tmp_kernel_op_73*tmp_kernel_op_73);
+             const real_t tmp_kernel_op_75 = (tmp_kernel_op_72*tmp_kernel_op_72);
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_74 + tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+             const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_44 + tmp_kernel_op_72) + tmp_kernel_op_38*(tmp_kernel_op_45 + tmp_kernel_op_73));
+             const real_t tmp_kernel_op_81 = -tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_73*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_83 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_81*(tmp_kernel_op_38*tmp_kernel_op_82 + tmp_kernel_op_75*tmp_kernel_op_83) - tmp_kernel_op_84*(-tmp_kernel_op_28*tmp_kernel_op_82 - tmp_kernel_op_72*tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80));
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_81*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = -tmp_kernel_op_84*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_86 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+             const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_104 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+             const real_t tmp_kernel_op_106 = p_affine_0_1 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_107 = p_affine_0_0 + tmp_kernel_op_32*0.16666666666666666 + tmp_kernel_op_33*0.16666666666666666;
+             const real_t tmp_kernel_op_108 = (tmp_kernel_op_107*tmp_kernel_op_107);
+             const real_t tmp_kernel_op_109 = (tmp_kernel_op_106*tmp_kernel_op_106);
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_108 + tmp_kernel_op_109;
+             const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+             const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(-tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_44) + tmp_kernel_op_38*(tmp_kernel_op_107 + tmp_kernel_op_45));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_112*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_107*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_117 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+             const real_t tmp_kernel_op_118 = -tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_117 + tmp_kernel_op_112*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_115*(tmp_kernel_op_109*tmp_kernel_op_117 + tmp_kernel_op_116*tmp_kernel_op_38) - tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114 - tmp_kernel_op_116*tmp_kernel_op_28));
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_115*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = -tmp_kernel_op_118*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_120 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+             const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_52 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_53;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+             const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_86 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+             const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_120 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+             const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+             const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+             const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f8e490fa7899b12a0db438586bbe7947cfd25737
--- /dev/null
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,576 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.66666666666666663;
+       const real_t tmp_kernel_op_1 = 2.6666666666666665;
+       const real_t tmp_kernel_op_2 = tmp_kernel_op_0 + tmp_kernel_op_1 - 3.0;
+       const real_t tmp_kernel_op_3 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_4 = -tmp_kernel_op_3;
+       const real_t tmp_kernel_op_18 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_19 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19;
+       const real_t tmp_kernel_op_21 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_22 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_23 = -tmp_kernel_op_22*1.0 / (-tmp_kernel_op_18*tmp_kernel_op_20 + tmp_kernel_op_21*tmp_kernel_op_4);
+       const real_t tmp_kernel_op_24 = tmp_kernel_op_23*1.0;
+       const real_t tmp_kernel_op_28 = -rayVertex_1;
+       const real_t tmp_kernel_op_29 = -rayVertex_0;
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_22*1.0 / (tmp_kernel_op_18*tmp_kernel_op_19 - tmp_kernel_op_21*tmp_kernel_op_3);
+       const real_t tmp_kernel_op_49 = tmp_kernel_op_48*1.0;
+       const real_t tmp_kernel_op_60 = 2.6666666666666665;
+       const real_t tmp_kernel_op_61 = 0.66666666666666663;
+       const real_t tmp_kernel_op_62 = tmp_kernel_op_60 + tmp_kernel_op_61 - 3.0;
+       const real_t tmp_kernel_op_97 = 0.66666666666666663;
+       const real_t tmp_kernel_op_98 = 0.66666666666666663;
+       const real_t tmp_kernel_op_99 = tmp_kernel_op_97 + tmp_kernel_op_98 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_5 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+             const real_t tmp_kernel_op_14 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663;
+             const real_t tmp_kernel_op_15 = (tmp_kernel_op_14*tmp_kernel_op_14);
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_15 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_14 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_14*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_34 = -tmp_kernel_op_14*tmp_kernel_op_33*tmp_kernel_op_9 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_31*(tmp_kernel_op_16*tmp_kernel_op_33 + tmp_kernel_op_20*tmp_kernel_op_32) - tmp_kernel_op_34*(-tmp_kernel_op_14*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9 - tmp_kernel_op_32*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_31*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = -tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_36 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_40 = -tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_44 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_45);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_3*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_19*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_3*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*0.16666666666666674;
+             const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_64 = p_affine_0_0 + tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_66 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_65 + tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_24*pow(tmp_kernel_op_67, -0.50000000000000000);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_63*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_70 = pow(tmp_kernel_op_67, -1.5000000000000000);
+             const real_t tmp_kernel_op_71 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_64) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_72 = -tmp_kernel_op_4*tmp_kernel_op_69 + tmp_kernel_op_65*tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_64*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_20*tmp_kernel_op_69 - tmp_kernel_op_63*tmp_kernel_op_64*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_72*(tmp_kernel_op_20*tmp_kernel_op_73 + tmp_kernel_op_66*tmp_kernel_op_74) - tmp_kernel_op_75*(-tmp_kernel_op_4*tmp_kernel_op_73 - tmp_kernel_op_63*tmp_kernel_op_64*tmp_kernel_op_70*tmp_kernel_op_71));
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_72*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_78 = -tmp_kernel_op_75*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_81 = -tmp_kernel_op_62*tmp_kernel_op_79 - tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_10*0.66666666666666663 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_44 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_82) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_84);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91)*(tmp_kernel_op_3*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93) - (tmp_kernel_op_19*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94)*(tmp_kernel_op_3*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*0.16666666666666671;
+             const real_t tmp_kernel_op_100 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_101 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = (tmp_kernel_op_100*tmp_kernel_op_100);
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_102 + tmp_kernel_op_103;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_104, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_107 = pow(tmp_kernel_op_104, -1.5000000000000000);
+             const real_t tmp_kernel_op_108 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_101 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_100 + tmp_kernel_op_28));
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_102*tmp_kernel_op_107*tmp_kernel_op_108*1.0 - tmp_kernel_op_106*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_107*tmp_kernel_op_108*1.0;
+             const real_t tmp_kernel_op_112 = -tmp_kernel_op_100*tmp_kernel_op_101*tmp_kernel_op_111 + tmp_kernel_op_106*tmp_kernel_op_20;
+             const real_t tmp_kernel_op_113 = 1.0 / (tmp_kernel_op_109*(tmp_kernel_op_103*tmp_kernel_op_111 + tmp_kernel_op_110*tmp_kernel_op_20) - tmp_kernel_op_112*(-tmp_kernel_op_100*tmp_kernel_op_101*tmp_kernel_op_107*tmp_kernel_op_108 - tmp_kernel_op_110*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_114 = tmp_kernel_op_109*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_115 = -tmp_kernel_op_112*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_116 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_114 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_114 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_118 = -tmp_kernel_op_116*tmp_kernel_op_99 - tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_44 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_119) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_121);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_3)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_19) + (tmp_kernel_op_125*tmp_kernel_op_3 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_19));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*0.66666666666666674;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_116*(tmp_kernel_op_97 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_60*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_61*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_143 - tmp_kernel_op_144;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_117*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_116*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_146 - tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_141 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333);
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_144 - tmp_kernel_op_80*(-tmp_kernel_op_60 + 2.666666666666667);
+             const real_t tmp_kernel_op_151 = -tmp_kernel_op_117*(-tmp_kernel_op_97 + 2.666666666666667) + tmp_kernel_op_147;
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_140 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667);
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_143 - tmp_kernel_op_79*(-tmp_kernel_op_61 - 1.333333333333333);
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_116*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_146;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_58*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_95*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t elMat_0_0 = tmp_kernel_op_118*tmp_kernel_op_133 + tmp_kernel_op_40*tmp_kernel_op_59 + tmp_kernel_op_81*tmp_kernel_op_96;
+             const real_t elMat_0_1 = -tmp_kernel_op_133*tmp_kernel_op_136 - tmp_kernel_op_134*tmp_kernel_op_59 - tmp_kernel_op_135*tmp_kernel_op_96;
+             const real_t elMat_0_2 = -tmp_kernel_op_133*tmp_kernel_op_139 - tmp_kernel_op_137*tmp_kernel_op_59 - tmp_kernel_op_138*tmp_kernel_op_96;
+             const real_t elMat_0_3 = tmp_kernel_op_133*tmp_kernel_op_148 + tmp_kernel_op_142*tmp_kernel_op_59 + tmp_kernel_op_145*tmp_kernel_op_96;
+             const real_t elMat_0_4 = tmp_kernel_op_133*tmp_kernel_op_151 + tmp_kernel_op_149*tmp_kernel_op_59 + tmp_kernel_op_150*tmp_kernel_op_96;
+             const real_t elMat_0_5 = tmp_kernel_op_133*tmp_kernel_op_154 + tmp_kernel_op_152*tmp_kernel_op_59 + tmp_kernel_op_153*tmp_kernel_op_96;
+             const real_t elMat_1_0 = tmp_kernel_op_118*tmp_kernel_op_157 + tmp_kernel_op_155*tmp_kernel_op_40 + tmp_kernel_op_156*tmp_kernel_op_81;
+             const real_t elMat_1_1 = -tmp_kernel_op_134*tmp_kernel_op_155 - tmp_kernel_op_135*tmp_kernel_op_156 - tmp_kernel_op_136*tmp_kernel_op_157;
+             const real_t elMat_1_2 = -tmp_kernel_op_137*tmp_kernel_op_155 - tmp_kernel_op_138*tmp_kernel_op_156 - tmp_kernel_op_139*tmp_kernel_op_157;
+             const real_t elMat_1_3 = tmp_kernel_op_142*tmp_kernel_op_155 + tmp_kernel_op_145*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157;
+             const real_t elMat_1_4 = tmp_kernel_op_149*tmp_kernel_op_155 + tmp_kernel_op_150*tmp_kernel_op_156 + tmp_kernel_op_151*tmp_kernel_op_157;
+             const real_t elMat_1_5 = tmp_kernel_op_152*tmp_kernel_op_155 + tmp_kernel_op_153*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157;
+             const real_t elMat_2_0 = tmp_kernel_op_118*tmp_kernel_op_160 + tmp_kernel_op_158*tmp_kernel_op_40 + tmp_kernel_op_159*tmp_kernel_op_81;
+             const real_t elMat_2_1 = -tmp_kernel_op_134*tmp_kernel_op_158 - tmp_kernel_op_135*tmp_kernel_op_159 - tmp_kernel_op_136*tmp_kernel_op_160;
+             const real_t elMat_2_2 = -tmp_kernel_op_137*tmp_kernel_op_158 - tmp_kernel_op_138*tmp_kernel_op_159 - tmp_kernel_op_139*tmp_kernel_op_160;
+             const real_t elMat_2_3 = tmp_kernel_op_142*tmp_kernel_op_158 + tmp_kernel_op_145*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160;
+             const real_t elMat_2_4 = tmp_kernel_op_149*tmp_kernel_op_158 + tmp_kernel_op_150*tmp_kernel_op_159 + tmp_kernel_op_151*tmp_kernel_op_160;
+             const real_t elMat_2_5 = tmp_kernel_op_152*tmp_kernel_op_158 + tmp_kernel_op_153*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160;
+         
+             std::vector< uint_t > _data_rowIdx( 3 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_5 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+             const real_t tmp_kernel_op_14 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663;
+             const real_t tmp_kernel_op_15 = (tmp_kernel_op_14*tmp_kernel_op_14);
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_15 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_14 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_14*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_34 = -tmp_kernel_op_14*tmp_kernel_op_33*tmp_kernel_op_9 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_31*(tmp_kernel_op_16*tmp_kernel_op_33 + tmp_kernel_op_20*tmp_kernel_op_32) - tmp_kernel_op_34*(-tmp_kernel_op_14*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9 - tmp_kernel_op_32*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_31*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = -tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_36 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_40 = -tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_44 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_45);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_3*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_19*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_3*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*0.16666666666666674;
+             const real_t tmp_kernel_op_63 = p_affine_0_1 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_64 = p_affine_0_0 + tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_66 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_65 + tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_24*pow(tmp_kernel_op_67, -0.50000000000000000);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_63*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_70 = pow(tmp_kernel_op_67, -1.5000000000000000);
+             const real_t tmp_kernel_op_71 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_64) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_72 = -tmp_kernel_op_4*tmp_kernel_op_69 + tmp_kernel_op_65*tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_64*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_20*tmp_kernel_op_69 - tmp_kernel_op_63*tmp_kernel_op_64*tmp_kernel_op_74;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_72*(tmp_kernel_op_20*tmp_kernel_op_73 + tmp_kernel_op_66*tmp_kernel_op_74) - tmp_kernel_op_75*(-tmp_kernel_op_4*tmp_kernel_op_73 - tmp_kernel_op_63*tmp_kernel_op_64*tmp_kernel_op_70*tmp_kernel_op_71));
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_72*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_78 = -tmp_kernel_op_75*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_81 = -tmp_kernel_op_62*tmp_kernel_op_79 - tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_10*0.66666666666666663 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_44 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_82) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_84);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91)*(tmp_kernel_op_3*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93) - (tmp_kernel_op_19*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94)*(tmp_kernel_op_3*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*0.16666666666666671;
+             const real_t tmp_kernel_op_100 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_101 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = (tmp_kernel_op_100*tmp_kernel_op_100);
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_102 + tmp_kernel_op_103;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_104, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_107 = pow(tmp_kernel_op_104, -1.5000000000000000);
+             const real_t tmp_kernel_op_108 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_101 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_100 + tmp_kernel_op_28));
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_102*tmp_kernel_op_107*tmp_kernel_op_108*1.0 - tmp_kernel_op_106*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_101*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_107*tmp_kernel_op_108*1.0;
+             const real_t tmp_kernel_op_112 = -tmp_kernel_op_100*tmp_kernel_op_101*tmp_kernel_op_111 + tmp_kernel_op_106*tmp_kernel_op_20;
+             const real_t tmp_kernel_op_113 = 1.0 / (tmp_kernel_op_109*(tmp_kernel_op_103*tmp_kernel_op_111 + tmp_kernel_op_110*tmp_kernel_op_20) - tmp_kernel_op_112*(-tmp_kernel_op_100*tmp_kernel_op_101*tmp_kernel_op_107*tmp_kernel_op_108 - tmp_kernel_op_110*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_114 = tmp_kernel_op_109*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_115 = -tmp_kernel_op_112*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_116 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_114 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_114 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_118 = -tmp_kernel_op_116*tmp_kernel_op_99 - tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_44 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_119) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_121);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_3)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_19) + (tmp_kernel_op_125*tmp_kernel_op_3 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_19));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*0.66666666666666674;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_116*(tmp_kernel_op_97 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_60*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_61*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_143 - tmp_kernel_op_144;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_117*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_116*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_146 - tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_141 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333);
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_144 - tmp_kernel_op_80*(-tmp_kernel_op_60 + 2.666666666666667);
+             const real_t tmp_kernel_op_151 = -tmp_kernel_op_117*(-tmp_kernel_op_97 + 2.666666666666667) + tmp_kernel_op_147;
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_140 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667);
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_143 - tmp_kernel_op_79*(-tmp_kernel_op_61 - 1.333333333333333);
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_116*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_146;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_58*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_95*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t elMat_0_0 = tmp_kernel_op_118*tmp_kernel_op_133 + tmp_kernel_op_40*tmp_kernel_op_59 + tmp_kernel_op_81*tmp_kernel_op_96;
+             const real_t elMat_0_1 = -tmp_kernel_op_133*tmp_kernel_op_136 - tmp_kernel_op_134*tmp_kernel_op_59 - tmp_kernel_op_135*tmp_kernel_op_96;
+             const real_t elMat_0_2 = -tmp_kernel_op_133*tmp_kernel_op_139 - tmp_kernel_op_137*tmp_kernel_op_59 - tmp_kernel_op_138*tmp_kernel_op_96;
+             const real_t elMat_0_3 = tmp_kernel_op_133*tmp_kernel_op_148 + tmp_kernel_op_142*tmp_kernel_op_59 + tmp_kernel_op_145*tmp_kernel_op_96;
+             const real_t elMat_0_4 = tmp_kernel_op_133*tmp_kernel_op_151 + tmp_kernel_op_149*tmp_kernel_op_59 + tmp_kernel_op_150*tmp_kernel_op_96;
+             const real_t elMat_0_5 = tmp_kernel_op_133*tmp_kernel_op_154 + tmp_kernel_op_152*tmp_kernel_op_59 + tmp_kernel_op_153*tmp_kernel_op_96;
+             const real_t elMat_1_0 = tmp_kernel_op_118*tmp_kernel_op_157 + tmp_kernel_op_155*tmp_kernel_op_40 + tmp_kernel_op_156*tmp_kernel_op_81;
+             const real_t elMat_1_1 = -tmp_kernel_op_134*tmp_kernel_op_155 - tmp_kernel_op_135*tmp_kernel_op_156 - tmp_kernel_op_136*tmp_kernel_op_157;
+             const real_t elMat_1_2 = -tmp_kernel_op_137*tmp_kernel_op_155 - tmp_kernel_op_138*tmp_kernel_op_156 - tmp_kernel_op_139*tmp_kernel_op_157;
+             const real_t elMat_1_3 = tmp_kernel_op_142*tmp_kernel_op_155 + tmp_kernel_op_145*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157;
+             const real_t elMat_1_4 = tmp_kernel_op_149*tmp_kernel_op_155 + tmp_kernel_op_150*tmp_kernel_op_156 + tmp_kernel_op_151*tmp_kernel_op_157;
+             const real_t elMat_1_5 = tmp_kernel_op_152*tmp_kernel_op_155 + tmp_kernel_op_153*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157;
+             const real_t elMat_2_0 = tmp_kernel_op_118*tmp_kernel_op_160 + tmp_kernel_op_158*tmp_kernel_op_40 + tmp_kernel_op_159*tmp_kernel_op_81;
+             const real_t elMat_2_1 = -tmp_kernel_op_134*tmp_kernel_op_158 - tmp_kernel_op_135*tmp_kernel_op_159 - tmp_kernel_op_136*tmp_kernel_op_160;
+             const real_t elMat_2_2 = -tmp_kernel_op_137*tmp_kernel_op_158 - tmp_kernel_op_138*tmp_kernel_op_159 - tmp_kernel_op_139*tmp_kernel_op_160;
+             const real_t elMat_2_3 = tmp_kernel_op_142*tmp_kernel_op_158 + tmp_kernel_op_145*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160;
+             const real_t elMat_2_4 = tmp_kernel_op_149*tmp_kernel_op_158 + tmp_kernel_op_150*tmp_kernel_op_159 + tmp_kernel_op_151*tmp_kernel_op_160;
+             const real_t elMat_2_5 = tmp_kernel_op_152*tmp_kernel_op_158 + tmp_kernel_op_153*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160;
+         
+             std::vector< uint_t > _data_rowIdx( 3 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..358763d39859dc351ad4683e1b47207aef8bea9e
--- /dev/null
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp
@@ -0,0 +1,484 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_13 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_15 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_16 = tmp_kernel_op_15*1.0 / (tmp_kernel_op_0*tmp_kernel_op_12 - tmp_kernel_op_13*tmp_kernel_op_14);
+       const real_t tmp_kernel_op_17 = tmp_kernel_op_16*1.0;
+       const real_t tmp_kernel_op_28 = -tmp_kernel_op_0;
+       const real_t tmp_kernel_op_38 = -tmp_kernel_op_13;
+       const real_t tmp_kernel_op_39 = -tmp_kernel_op_15*1.0 / (-tmp_kernel_op_12*tmp_kernel_op_28 + tmp_kernel_op_14*tmp_kernel_op_38);
+       const real_t tmp_kernel_op_40 = tmp_kernel_op_39*1.0;
+       const real_t tmp_kernel_op_44 = -rayVertex_1;
+       const real_t tmp_kernel_op_45 = -rayVertex_0;
+       const real_t tmp_kernel_op_55 = 0.66666666666666663;
+       const real_t tmp_kernel_op_89 = 2.6666666666666665;
+       const real_t tmp_kernel_op_123 = 0.66666666666666663;
+       const real_t tmp_kernel_op_126 = 2.6666666666666665;
+       const real_t tmp_kernel_op_129 = 0.66666666666666663;
+       const real_t tmp_kernel_op_132 = 0.66666666666666663;
+       const real_t tmp_kernel_op_149 = tmp_kernel_op_126 + tmp_kernel_op_55 - 3.0;
+       const real_t tmp_kernel_op_151 = tmp_kernel_op_129 + tmp_kernel_op_89 - 3.0;
+       const real_t tmp_kernel_op_153 = tmp_kernel_op_123 + tmp_kernel_op_132 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t tmp_kernel_op_1 = -p_affine_0_0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+             const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+             const real_t tmp_kernel_op_6 = -p_affine_0_1;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+             const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+             const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_26 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+             const real_t tmp_kernel_op_29 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_30 = -tmp_kernel_op_3;
+             const real_t tmp_kernel_op_31 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+             const real_t tmp_kernel_op_32 = (tmp_kernel_op_31*tmp_kernel_op_31);
+             const real_t tmp_kernel_op_33 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_34 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_35 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.66666666666666663;
+             const real_t tmp_kernel_op_36 = (tmp_kernel_op_35*tmp_kernel_op_35);
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_32 + tmp_kernel_op_36;
+             const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+             const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_35 + tmp_kernel_op_44));
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+             const real_t tmp_kernel_op_48 = tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_36*tmp_kernel_op_47;
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_35*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_50 = tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0 + tmp_kernel_op_38*tmp_kernel_op_42;
+             const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_48*(tmp_kernel_op_32*tmp_kernel_op_43*tmp_kernel_op_46*1.0 - tmp_kernel_op_38*tmp_kernel_op_49) + tmp_kernel_op_50*(tmp_kernel_op_28*tmp_kernel_op_49 - tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_47));
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_48*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_50*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_53 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+             const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+             const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+             const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+             const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+             const real_t tmp_kernel_op_70 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+             const real_t tmp_kernel_op_72 = p_affine_0_0 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_73 = (tmp_kernel_op_72*tmp_kernel_op_72);
+             const real_t tmp_kernel_op_74 = p_affine_0_1 + tmp_kernel_op_33*0.66666666666666663 + tmp_kernel_op_34*0.16666666666666666;
+             const real_t tmp_kernel_op_75 = (tmp_kernel_op_74*tmp_kernel_op_74);
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_73 + tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+             const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_45 + tmp_kernel_op_72) - tmp_kernel_op_38*(tmp_kernel_op_44 + tmp_kernel_op_74));
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_75*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_83 = tmp_kernel_op_74*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_82*(-tmp_kernel_op_38*tmp_kernel_op_83 + tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80*1.0) + tmp_kernel_op_84*(tmp_kernel_op_28*tmp_kernel_op_83 - tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_81));
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_82*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_84*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_87 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+             const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_104 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+             const real_t tmp_kernel_op_106 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_107 = (tmp_kernel_op_106*tmp_kernel_op_106);
+             const real_t tmp_kernel_op_108 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.16666666666666666;
+             const real_t tmp_kernel_op_109 = (tmp_kernel_op_108*tmp_kernel_op_108);
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_107 + tmp_kernel_op_109;
+             const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+             const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_108 + tmp_kernel_op_44));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_109*tmp_kernel_op_115 + tmp_kernel_op_112*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_117 = tmp_kernel_op_108*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 + tmp_kernel_op_112*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_116*(tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_117*tmp_kernel_op_38) + tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_115 + tmp_kernel_op_117*tmp_kernel_op_28));
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_116*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_118*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_121 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_120;
+             const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+             const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_53 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+             const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_87 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+             const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_121 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_120;
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+             const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+             const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+             const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t tmp_kernel_op_1 = -p_affine_0_0;
+             const real_t tmp_kernel_op_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_4 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.66666666666666663;
+             const real_t tmp_kernel_op_5 = (tmp_kernel_op_4*tmp_kernel_op_4);
+             const real_t tmp_kernel_op_6 = -p_affine_0_1;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_9 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = tmp_kernel_op_10 + tmp_kernel_op_5;
+             const real_t tmp_kernel_op_18 = pow(tmp_kernel_op_11, -0.50000000000000000)*tmp_kernel_op_17;
+             const real_t tmp_kernel_op_19 = tmp_kernel_op_18*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_20 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_4) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_9);
+             const real_t tmp_kernel_op_21 = pow(tmp_kernel_op_11, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_22 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_23 = tmp_kernel_op_18*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_24 = tmp_kernel_op_21*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_20);
+             const real_t tmp_kernel_op_25 = tmp_kernel_op_4*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_26 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_19 - tmp_kernel_op_10*tmp_kernel_op_22)*(tmp_kernel_op_13*tmp_kernel_op_23 + tmp_kernel_op_24*tmp_kernel_op_5) - (tmp_kernel_op_0*tmp_kernel_op_23 + tmp_kernel_op_22*tmp_kernel_op_25)*(tmp_kernel_op_13*tmp_kernel_op_19 - tmp_kernel_op_24*tmp_kernel_op_25));
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_26*0.16666666666666674;
+             const real_t tmp_kernel_op_29 = -tmp_kernel_op_2;
+             const real_t tmp_kernel_op_30 = -tmp_kernel_op_3;
+             const real_t tmp_kernel_op_31 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.66666666666666663;
+             const real_t tmp_kernel_op_32 = (tmp_kernel_op_31*tmp_kernel_op_31);
+             const real_t tmp_kernel_op_33 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_34 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_35 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.66666666666666663;
+             const real_t tmp_kernel_op_36 = (tmp_kernel_op_35*tmp_kernel_op_35);
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_32 + tmp_kernel_op_36;
+             const real_t tmp_kernel_op_41 = pow(tmp_kernel_op_37, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_31*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = pow(tmp_kernel_op_37, -1.5000000000000000);
+             const real_t tmp_kernel_op_46 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_31 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_35 + tmp_kernel_op_44));
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43*tmp_kernel_op_46*1.0;
+             const real_t tmp_kernel_op_48 = tmp_kernel_op_28*tmp_kernel_op_42 + tmp_kernel_op_36*tmp_kernel_op_47;
+             const real_t tmp_kernel_op_49 = tmp_kernel_op_35*tmp_kernel_op_41;
+             const real_t tmp_kernel_op_50 = tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_43*tmp_kernel_op_46*1.0 + tmp_kernel_op_38*tmp_kernel_op_42;
+             const real_t tmp_kernel_op_51 = 1.0 / (tmp_kernel_op_48*(tmp_kernel_op_32*tmp_kernel_op_43*tmp_kernel_op_46*1.0 - tmp_kernel_op_38*tmp_kernel_op_49) + tmp_kernel_op_50*(tmp_kernel_op_28*tmp_kernel_op_49 - tmp_kernel_op_31*tmp_kernel_op_35*tmp_kernel_op_47));
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_48*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_50*tmp_kernel_op_51;
+             const real_t tmp_kernel_op_54 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_53 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_54*(tmp_kernel_op_55 - 1.0);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_1 + tmp_kernel_op_2*0.66666666666666663 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_58 = (tmp_kernel_op_57*tmp_kernel_op_57);
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_6 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_60 = (tmp_kernel_op_59*tmp_kernel_op_59);
+             const real_t tmp_kernel_op_61 = tmp_kernel_op_58 + tmp_kernel_op_60;
+             const real_t tmp_kernel_op_62 = tmp_kernel_op_17*pow(tmp_kernel_op_61, -0.50000000000000000);
+             const real_t tmp_kernel_op_63 = tmp_kernel_op_57*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_64 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_57) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_59);
+             const real_t tmp_kernel_op_65 = pow(tmp_kernel_op_61, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_59*tmp_kernel_op_62;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_65*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_57*tmp_kernel_op_59;
+             const real_t tmp_kernel_op_70 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_63 - tmp_kernel_op_60*tmp_kernel_op_66)*(tmp_kernel_op_13*tmp_kernel_op_67 + tmp_kernel_op_58*tmp_kernel_op_68) - (tmp_kernel_op_0*tmp_kernel_op_67 + tmp_kernel_op_66*tmp_kernel_op_69)*(tmp_kernel_op_13*tmp_kernel_op_63 - tmp_kernel_op_68*tmp_kernel_op_69));
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_70*0.16666666666666671;
+             const real_t tmp_kernel_op_72 = p_affine_0_0 + tmp_kernel_op_29*0.66666666666666663 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_73 = (tmp_kernel_op_72*tmp_kernel_op_72);
+             const real_t tmp_kernel_op_74 = p_affine_0_1 + tmp_kernel_op_33*0.66666666666666663 + tmp_kernel_op_34*0.16666666666666666;
+             const real_t tmp_kernel_op_75 = (tmp_kernel_op_74*tmp_kernel_op_74);
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_73 + tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_40*pow(tmp_kernel_op_76, -0.50000000000000000);
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_72*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = pow(tmp_kernel_op_76, -1.5000000000000000);
+             const real_t tmp_kernel_op_80 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_45 + tmp_kernel_op_72) - tmp_kernel_op_38*(tmp_kernel_op_44 + tmp_kernel_op_74));
+             const real_t tmp_kernel_op_81 = tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_28*tmp_kernel_op_78 + tmp_kernel_op_75*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_83 = tmp_kernel_op_74*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_38*tmp_kernel_op_78 + tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_79*tmp_kernel_op_80*1.0;
+             const real_t tmp_kernel_op_85 = 1.0 / (tmp_kernel_op_82*(-tmp_kernel_op_38*tmp_kernel_op_83 + tmp_kernel_op_73*tmp_kernel_op_79*tmp_kernel_op_80*1.0) + tmp_kernel_op_84*(tmp_kernel_op_28*tmp_kernel_op_83 - tmp_kernel_op_72*tmp_kernel_op_74*tmp_kernel_op_81));
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_82*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_84*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_88 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_87 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_88*(tmp_kernel_op_89 - 1.0);
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_1 + tmp_kernel_op_2*0.16666666666666666 + tmp_kernel_op_3*0.16666666666666666;
+             const real_t tmp_kernel_op_92 = (tmp_kernel_op_91*tmp_kernel_op_91);
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_6 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_94 = (tmp_kernel_op_93*tmp_kernel_op_93);
+             const real_t tmp_kernel_op_95 = tmp_kernel_op_92 + tmp_kernel_op_94;
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_17*pow(tmp_kernel_op_95, -0.50000000000000000);
+             const real_t tmp_kernel_op_97 = tmp_kernel_op_91*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_98 = -tmp_kernel_op_0*(rayVertex_0 + tmp_kernel_op_91) + tmp_kernel_op_13*(rayVertex_1 + tmp_kernel_op_93);
+             const real_t tmp_kernel_op_99 = pow(tmp_kernel_op_95, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_100 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_101 = tmp_kernel_op_93*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_99*(radRayVertex + tmp_kernel_op_16*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_103 = tmp_kernel_op_91*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_104 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_0*tmp_kernel_op_101 + tmp_kernel_op_100*tmp_kernel_op_103)*(tmp_kernel_op_102*tmp_kernel_op_103 - tmp_kernel_op_13*tmp_kernel_op_97) + (tmp_kernel_op_0*tmp_kernel_op_97 - tmp_kernel_op_100*tmp_kernel_op_94)*(tmp_kernel_op_101*tmp_kernel_op_13 + tmp_kernel_op_102*tmp_kernel_op_92));
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_104*0.66666666666666674;
+             const real_t tmp_kernel_op_106 = p_affine_0_0 + tmp_kernel_op_29*0.16666666666666666 + tmp_kernel_op_30*0.16666666666666666;
+             const real_t tmp_kernel_op_107 = (tmp_kernel_op_106*tmp_kernel_op_106);
+             const real_t tmp_kernel_op_108 = p_affine_0_1 + tmp_kernel_op_33*0.16666666666666666 + tmp_kernel_op_34*0.16666666666666666;
+             const real_t tmp_kernel_op_109 = (tmp_kernel_op_108*tmp_kernel_op_108);
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_107 + tmp_kernel_op_109;
+             const real_t tmp_kernel_op_111 = pow(tmp_kernel_op_110, -0.50000000000000000)*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_106*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = pow(tmp_kernel_op_110, -1.5000000000000000);
+             const real_t tmp_kernel_op_114 = radRayVertex + tmp_kernel_op_39*(tmp_kernel_op_28*(tmp_kernel_op_106 + tmp_kernel_op_45) - tmp_kernel_op_38*(tmp_kernel_op_108 + tmp_kernel_op_44));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_113*tmp_kernel_op_114*1.0;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_109*tmp_kernel_op_115 + tmp_kernel_op_112*tmp_kernel_op_28;
+             const real_t tmp_kernel_op_117 = tmp_kernel_op_108*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_113*tmp_kernel_op_114*1.0 + tmp_kernel_op_112*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_119 = 1.0 / (tmp_kernel_op_116*(tmp_kernel_op_107*tmp_kernel_op_113*tmp_kernel_op_114*1.0 - tmp_kernel_op_117*tmp_kernel_op_38) + tmp_kernel_op_118*(-tmp_kernel_op_106*tmp_kernel_op_108*tmp_kernel_op_115 + tmp_kernel_op_117*tmp_kernel_op_28));
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_116*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_118*tmp_kernel_op_119;
+             const real_t tmp_kernel_op_122 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_121 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_120;
+             const real_t tmp_kernel_op_124 = tmp_kernel_op_122*(tmp_kernel_op_123 - 1.0);
+             const real_t tmp_kernel_op_125 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_53 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_52;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_125*(tmp_kernel_op_126 - 1.0);
+             const real_t tmp_kernel_op_128 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_87 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_86;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_128*(tmp_kernel_op_129 - 1.0);
+             const real_t tmp_kernel_op_131 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_121 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_120;
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_131*(tmp_kernel_op_132 - 1.0);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_125*tmp_kernel_op_55;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_126*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_136 = -tmp_kernel_op_134 - tmp_kernel_op_135;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_128*tmp_kernel_op_89;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_129*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_139 = -tmp_kernel_op_137 - tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_123*tmp_kernel_op_131;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_122*tmp_kernel_op_132;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = -tmp_kernel_op_125*(-tmp_kernel_op_55 - 1.333333333333333) + tmp_kernel_op_135;
+             const real_t tmp_kernel_op_144 = -tmp_kernel_op_128*(-tmp_kernel_op_89 + 2.666666666666667) + tmp_kernel_op_138;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_131*(-tmp_kernel_op_123 + 2.666666666666667) + tmp_kernel_op_141;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_134 - tmp_kernel_op_54*(-tmp_kernel_op_126 + 2.666666666666667);
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_137 - tmp_kernel_op_88*(-tmp_kernel_op_129 - 1.333333333333333);
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_122*(-tmp_kernel_op_132 + 2.666666666666667) + tmp_kernel_op_140;
+             const real_t tmp_kernel_op_150 = -tmp_kernel_op_125*tmp_kernel_op_149 - tmp_kernel_op_149*tmp_kernel_op_54;
+             const real_t tmp_kernel_op_152 = -tmp_kernel_op_128*tmp_kernel_op_151 - tmp_kernel_op_151*tmp_kernel_op_88;
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_122*tmp_kernel_op_153 - tmp_kernel_op_131*tmp_kernel_op_153;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_26*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_70*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_26*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_70*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_104*0.16666666666666666;
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_105*tmp_kernel_op_154 + tmp_kernel_op_150*tmp_kernel_op_27 + tmp_kernel_op_152*tmp_kernel_op_71) + src_dof_1*(-tmp_kernel_op_105*tmp_kernel_op_124 - tmp_kernel_op_27*tmp_kernel_op_56 - tmp_kernel_op_71*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_105*tmp_kernel_op_133 - tmp_kernel_op_127*tmp_kernel_op_27 - tmp_kernel_op_130*tmp_kernel_op_71) + src_dof_3*(tmp_kernel_op_105*tmp_kernel_op_142 + tmp_kernel_op_136*tmp_kernel_op_27 + tmp_kernel_op_139*tmp_kernel_op_71) + src_dof_4*(tmp_kernel_op_105*tmp_kernel_op_145 + tmp_kernel_op_143*tmp_kernel_op_27 + tmp_kernel_op_144*tmp_kernel_op_71) + src_dof_5*(tmp_kernel_op_105*tmp_kernel_op_148 + tmp_kernel_op_146*tmp_kernel_op_27 + tmp_kernel_op_147*tmp_kernel_op_71);
+             const real_t elMatVec_1 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_155 + tmp_kernel_op_152*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_157 - tmp_kernel_op_155*tmp_kernel_op_56 - tmp_kernel_op_156*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_155 - tmp_kernel_op_130*tmp_kernel_op_156 - tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_155 + tmp_kernel_op_139*tmp_kernel_op_156 + tmp_kernel_op_142*tmp_kernel_op_157) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_155 + tmp_kernel_op_144*tmp_kernel_op_156 + tmp_kernel_op_145*tmp_kernel_op_157) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_155 + tmp_kernel_op_147*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157);
+             const real_t elMatVec_2 = src_dof_0*(tmp_kernel_op_150*tmp_kernel_op_158 + tmp_kernel_op_152*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160) + src_dof_1*(-tmp_kernel_op_124*tmp_kernel_op_160 - tmp_kernel_op_158*tmp_kernel_op_56 - tmp_kernel_op_159*tmp_kernel_op_90) + src_dof_2*(-tmp_kernel_op_127*tmp_kernel_op_158 - tmp_kernel_op_130*tmp_kernel_op_159 - tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_3*(tmp_kernel_op_136*tmp_kernel_op_158 + tmp_kernel_op_139*tmp_kernel_op_159 + tmp_kernel_op_142*tmp_kernel_op_160) + src_dof_4*(tmp_kernel_op_143*tmp_kernel_op_158 + tmp_kernel_op_144*tmp_kernel_op_159 + tmp_kernel_op_145*tmp_kernel_op_160) + src_dof_5*(tmp_kernel_op_146*tmp_kernel_op_158 + tmp_kernel_op_147*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160);
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6b2a859abd8a8b99d5c4a4c1ec6b6aa7f896de94
--- /dev/null
+++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp
@@ -0,0 +1,576 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.66666666666666663;
+       const real_t tmp_kernel_op_1 = 2.6666666666666665;
+       const real_t tmp_kernel_op_2 = tmp_kernel_op_0 + tmp_kernel_op_1 - 3.0;
+       const real_t tmp_kernel_op_3 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_4 = -tmp_kernel_op_3;
+       const real_t tmp_kernel_op_18 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_19 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19;
+       const real_t tmp_kernel_op_21 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_22 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_23 = -tmp_kernel_op_22*1.0 / (-tmp_kernel_op_18*tmp_kernel_op_4 + tmp_kernel_op_20*tmp_kernel_op_21);
+       const real_t tmp_kernel_op_24 = tmp_kernel_op_23*1.0;
+       const real_t tmp_kernel_op_28 = -rayVertex_1;
+       const real_t tmp_kernel_op_29 = -rayVertex_0;
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_22*1.0 / (tmp_kernel_op_18*tmp_kernel_op_3 - tmp_kernel_op_19*tmp_kernel_op_21);
+       const real_t tmp_kernel_op_49 = tmp_kernel_op_48*1.0;
+       const real_t tmp_kernel_op_60 = 2.6666666666666665;
+       const real_t tmp_kernel_op_61 = 0.66666666666666663;
+       const real_t tmp_kernel_op_62 = tmp_kernel_op_60 + tmp_kernel_op_61 - 3.0;
+       const real_t tmp_kernel_op_97 = 0.66666666666666663;
+       const real_t tmp_kernel_op_98 = 0.66666666666666663;
+       const real_t tmp_kernel_op_99 = tmp_kernel_op_97 + tmp_kernel_op_98 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+             const real_t tmp_kernel_op_13 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+             const real_t tmp_kernel_op_15 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_10 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_15 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_31 + tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9*1.0 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_10*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_20*tmp_kernel_op_33) + tmp_kernel_op_34*(-tmp_kernel_op_15*tmp_kernel_op_31*tmp_kernel_op_9 + tmp_kernel_op_33*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_32*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_40 = -tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_41 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_45) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_42);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57)*(tmp_kernel_op_3*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57) - (tmp_kernel_op_19*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*0.16666666666666674;
+             const real_t tmp_kernel_op_63 = p_affine_0_0 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = p_affine_0_1 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_66 = (tmp_kernel_op_65*tmp_kernel_op_65);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_64 + tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_24*pow(tmp_kernel_op_67, -0.50000000000000000);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_63*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_70 = pow(tmp_kernel_op_67, -1.5000000000000000);
+             const real_t tmp_kernel_op_71 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_28 + tmp_kernel_op_65) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_4*tmp_kernel_op_69 + tmp_kernel_op_66*tmp_kernel_op_72;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_65*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_20*tmp_kernel_op_69 + tmp_kernel_op_63*tmp_kernel_op_65*tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_73*(-tmp_kernel_op_20*tmp_kernel_op_74 + tmp_kernel_op_64*tmp_kernel_op_70*tmp_kernel_op_71*1.0) + tmp_kernel_op_75*(tmp_kernel_op_4*tmp_kernel_op_74 - tmp_kernel_op_63*tmp_kernel_op_65*tmp_kernel_op_72));
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_73*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_75*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_78 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_78 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_81 = -tmp_kernel_op_62*tmp_kernel_op_79 - tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_41 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_84) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_82);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94)*(tmp_kernel_op_3*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94) - (tmp_kernel_op_19*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93)*(tmp_kernel_op_3*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*0.16666666666666671;
+             const real_t tmp_kernel_op_100 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_101 = (tmp_kernel_op_100*tmp_kernel_op_100);
+             const real_t tmp_kernel_op_102 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_103 = (tmp_kernel_op_102*tmp_kernel_op_102);
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_101 + tmp_kernel_op_103;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_104, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_107 = pow(tmp_kernel_op_104, -1.5000000000000000);
+             const real_t tmp_kernel_op_108 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_102 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_100 + tmp_kernel_op_29));
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_107*tmp_kernel_op_108*1.0;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_103*tmp_kernel_op_109 + tmp_kernel_op_106*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_102*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_100*tmp_kernel_op_102*tmp_kernel_op_107*tmp_kernel_op_108*1.0 + tmp_kernel_op_106*tmp_kernel_op_20;
+             const real_t tmp_kernel_op_113 = 1.0 / (tmp_kernel_op_110*(tmp_kernel_op_101*tmp_kernel_op_107*tmp_kernel_op_108*1.0 - tmp_kernel_op_111*tmp_kernel_op_20) + tmp_kernel_op_112*(-tmp_kernel_op_100*tmp_kernel_op_102*tmp_kernel_op_109 + tmp_kernel_op_111*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_114 = tmp_kernel_op_110*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_112*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_116 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_115 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_115 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_118 = -tmp_kernel_op_116*tmp_kernel_op_99 - tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_41 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_121) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_119);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_19)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_3) + (tmp_kernel_op_125*tmp_kernel_op_19 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_3));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*0.66666666666666674;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_116*(tmp_kernel_op_97 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_60*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_61*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_143 - tmp_kernel_op_144;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_117*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_116*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_146 - tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_141 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333);
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_144 - tmp_kernel_op_80*(-tmp_kernel_op_60 + 2.666666666666667);
+             const real_t tmp_kernel_op_151 = -tmp_kernel_op_117*(-tmp_kernel_op_97 + 2.666666666666667) + tmp_kernel_op_147;
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_140 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667);
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_143 - tmp_kernel_op_79*(-tmp_kernel_op_61 - 1.333333333333333);
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_116*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_146;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_58*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_95*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t elMat_0_0 = tmp_kernel_op_118*tmp_kernel_op_133 + tmp_kernel_op_40*tmp_kernel_op_59 + tmp_kernel_op_81*tmp_kernel_op_96;
+             const real_t elMat_0_1 = -tmp_kernel_op_133*tmp_kernel_op_136 - tmp_kernel_op_134*tmp_kernel_op_59 - tmp_kernel_op_135*tmp_kernel_op_96;
+             const real_t elMat_0_2 = -tmp_kernel_op_133*tmp_kernel_op_139 - tmp_kernel_op_137*tmp_kernel_op_59 - tmp_kernel_op_138*tmp_kernel_op_96;
+             const real_t elMat_0_3 = tmp_kernel_op_133*tmp_kernel_op_148 + tmp_kernel_op_142*tmp_kernel_op_59 + tmp_kernel_op_145*tmp_kernel_op_96;
+             const real_t elMat_0_4 = tmp_kernel_op_133*tmp_kernel_op_151 + tmp_kernel_op_149*tmp_kernel_op_59 + tmp_kernel_op_150*tmp_kernel_op_96;
+             const real_t elMat_0_5 = tmp_kernel_op_133*tmp_kernel_op_154 + tmp_kernel_op_152*tmp_kernel_op_59 + tmp_kernel_op_153*tmp_kernel_op_96;
+             const real_t elMat_1_0 = tmp_kernel_op_118*tmp_kernel_op_157 + tmp_kernel_op_155*tmp_kernel_op_40 + tmp_kernel_op_156*tmp_kernel_op_81;
+             const real_t elMat_1_1 = -tmp_kernel_op_134*tmp_kernel_op_155 - tmp_kernel_op_135*tmp_kernel_op_156 - tmp_kernel_op_136*tmp_kernel_op_157;
+             const real_t elMat_1_2 = -tmp_kernel_op_137*tmp_kernel_op_155 - tmp_kernel_op_138*tmp_kernel_op_156 - tmp_kernel_op_139*tmp_kernel_op_157;
+             const real_t elMat_1_3 = tmp_kernel_op_142*tmp_kernel_op_155 + tmp_kernel_op_145*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157;
+             const real_t elMat_1_4 = tmp_kernel_op_149*tmp_kernel_op_155 + tmp_kernel_op_150*tmp_kernel_op_156 + tmp_kernel_op_151*tmp_kernel_op_157;
+             const real_t elMat_1_5 = tmp_kernel_op_152*tmp_kernel_op_155 + tmp_kernel_op_153*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157;
+             const real_t elMat_2_0 = tmp_kernel_op_118*tmp_kernel_op_160 + tmp_kernel_op_158*tmp_kernel_op_40 + tmp_kernel_op_159*tmp_kernel_op_81;
+             const real_t elMat_2_1 = -tmp_kernel_op_134*tmp_kernel_op_158 - tmp_kernel_op_135*tmp_kernel_op_159 - tmp_kernel_op_136*tmp_kernel_op_160;
+             const real_t elMat_2_2 = -tmp_kernel_op_137*tmp_kernel_op_158 - tmp_kernel_op_138*tmp_kernel_op_159 - tmp_kernel_op_139*tmp_kernel_op_160;
+             const real_t elMat_2_3 = tmp_kernel_op_142*tmp_kernel_op_158 + tmp_kernel_op_145*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160;
+             const real_t elMat_2_4 = tmp_kernel_op_149*tmp_kernel_op_158 + tmp_kernel_op_150*tmp_kernel_op_159 + tmp_kernel_op_151*tmp_kernel_op_160;
+             const real_t elMat_2_5 = tmp_kernel_op_152*tmp_kernel_op_158 + tmp_kernel_op_153*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160;
+         
+             std::vector< uint_t > _data_rowIdx( 3 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+             const real_t tmp_kernel_op_13 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+             const real_t tmp_kernel_op_15 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_10 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_15 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_31 + tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9*1.0 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_10*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_20*tmp_kernel_op_33) + tmp_kernel_op_34*(-tmp_kernel_op_15*tmp_kernel_op_31*tmp_kernel_op_9 + tmp_kernel_op_33*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_32*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_40 = -tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_41 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_45) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_42);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57)*(tmp_kernel_op_3*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57) - (tmp_kernel_op_19*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*0.16666666666666674;
+             const real_t tmp_kernel_op_63 = p_affine_0_0 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = p_affine_0_1 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_66 = (tmp_kernel_op_65*tmp_kernel_op_65);
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_64 + tmp_kernel_op_66;
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_24*pow(tmp_kernel_op_67, -0.50000000000000000);
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_63*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_70 = pow(tmp_kernel_op_67, -1.5000000000000000);
+             const real_t tmp_kernel_op_71 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_28 + tmp_kernel_op_65) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_63));
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_4*tmp_kernel_op_69 + tmp_kernel_op_66*tmp_kernel_op_72;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_65*tmp_kernel_op_68;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_20*tmp_kernel_op_69 + tmp_kernel_op_63*tmp_kernel_op_65*tmp_kernel_op_70*tmp_kernel_op_71*1.0;
+             const real_t tmp_kernel_op_76 = 1.0 / (tmp_kernel_op_73*(-tmp_kernel_op_20*tmp_kernel_op_74 + tmp_kernel_op_64*tmp_kernel_op_70*tmp_kernel_op_71*1.0) + tmp_kernel_op_75*(tmp_kernel_op_4*tmp_kernel_op_74 - tmp_kernel_op_63*tmp_kernel_op_65*tmp_kernel_op_72));
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_73*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_75*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_78 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_78 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_81 = -tmp_kernel_op_62*tmp_kernel_op_79 - tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_41 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_84) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_82);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94)*(tmp_kernel_op_3*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94) - (tmp_kernel_op_19*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93)*(tmp_kernel_op_3*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*0.16666666666666671;
+             const real_t tmp_kernel_op_100 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_101 = (tmp_kernel_op_100*tmp_kernel_op_100);
+             const real_t tmp_kernel_op_102 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_103 = (tmp_kernel_op_102*tmp_kernel_op_102);
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_101 + tmp_kernel_op_103;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_104, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_106 = tmp_kernel_op_100*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_107 = pow(tmp_kernel_op_104, -1.5000000000000000);
+             const real_t tmp_kernel_op_108 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_102 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_100 + tmp_kernel_op_29));
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_107*tmp_kernel_op_108*1.0;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_103*tmp_kernel_op_109 + tmp_kernel_op_106*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_102*tmp_kernel_op_105;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_100*tmp_kernel_op_102*tmp_kernel_op_107*tmp_kernel_op_108*1.0 + tmp_kernel_op_106*tmp_kernel_op_20;
+             const real_t tmp_kernel_op_113 = 1.0 / (tmp_kernel_op_110*(tmp_kernel_op_101*tmp_kernel_op_107*tmp_kernel_op_108*1.0 - tmp_kernel_op_111*tmp_kernel_op_20) + tmp_kernel_op_112*(-tmp_kernel_op_100*tmp_kernel_op_102*tmp_kernel_op_109 + tmp_kernel_op_111*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_114 = tmp_kernel_op_110*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_112*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_116 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_115 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_115 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_118 = -tmp_kernel_op_116*tmp_kernel_op_99 - tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_41 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_121) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_119);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_19)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_3) + (tmp_kernel_op_125*tmp_kernel_op_19 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_3));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*0.66666666666666674;
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_116*(tmp_kernel_op_97 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_142 = -tmp_kernel_op_140 - tmp_kernel_op_141;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_60*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_61*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_145 = -tmp_kernel_op_143 - tmp_kernel_op_144;
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_117*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_116*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_148 = -tmp_kernel_op_146 - tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_141 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333);
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_144 - tmp_kernel_op_80*(-tmp_kernel_op_60 + 2.666666666666667);
+             const real_t tmp_kernel_op_151 = -tmp_kernel_op_117*(-tmp_kernel_op_97 + 2.666666666666667) + tmp_kernel_op_147;
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_140 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667);
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_143 - tmp_kernel_op_79*(-tmp_kernel_op_61 - 1.333333333333333);
+             const real_t tmp_kernel_op_154 = -tmp_kernel_op_116*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_146;
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_58*0.16666666666666666;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_95*0.66666666666666663;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*0.66666666666666663;
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*0.16666666666666666;
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*0.16666666666666666;
+             const real_t elMat_0_0 = tmp_kernel_op_118*tmp_kernel_op_133 + tmp_kernel_op_40*tmp_kernel_op_59 + tmp_kernel_op_81*tmp_kernel_op_96;
+             const real_t elMat_0_1 = -tmp_kernel_op_133*tmp_kernel_op_136 - tmp_kernel_op_134*tmp_kernel_op_59 - tmp_kernel_op_135*tmp_kernel_op_96;
+             const real_t elMat_0_2 = -tmp_kernel_op_133*tmp_kernel_op_139 - tmp_kernel_op_137*tmp_kernel_op_59 - tmp_kernel_op_138*tmp_kernel_op_96;
+             const real_t elMat_0_3 = tmp_kernel_op_133*tmp_kernel_op_148 + tmp_kernel_op_142*tmp_kernel_op_59 + tmp_kernel_op_145*tmp_kernel_op_96;
+             const real_t elMat_0_4 = tmp_kernel_op_133*tmp_kernel_op_151 + tmp_kernel_op_149*tmp_kernel_op_59 + tmp_kernel_op_150*tmp_kernel_op_96;
+             const real_t elMat_0_5 = tmp_kernel_op_133*tmp_kernel_op_154 + tmp_kernel_op_152*tmp_kernel_op_59 + tmp_kernel_op_153*tmp_kernel_op_96;
+             const real_t elMat_1_0 = tmp_kernel_op_118*tmp_kernel_op_157 + tmp_kernel_op_155*tmp_kernel_op_40 + tmp_kernel_op_156*tmp_kernel_op_81;
+             const real_t elMat_1_1 = -tmp_kernel_op_134*tmp_kernel_op_155 - tmp_kernel_op_135*tmp_kernel_op_156 - tmp_kernel_op_136*tmp_kernel_op_157;
+             const real_t elMat_1_2 = -tmp_kernel_op_137*tmp_kernel_op_155 - tmp_kernel_op_138*tmp_kernel_op_156 - tmp_kernel_op_139*tmp_kernel_op_157;
+             const real_t elMat_1_3 = tmp_kernel_op_142*tmp_kernel_op_155 + tmp_kernel_op_145*tmp_kernel_op_156 + tmp_kernel_op_148*tmp_kernel_op_157;
+             const real_t elMat_1_4 = tmp_kernel_op_149*tmp_kernel_op_155 + tmp_kernel_op_150*tmp_kernel_op_156 + tmp_kernel_op_151*tmp_kernel_op_157;
+             const real_t elMat_1_5 = tmp_kernel_op_152*tmp_kernel_op_155 + tmp_kernel_op_153*tmp_kernel_op_156 + tmp_kernel_op_154*tmp_kernel_op_157;
+             const real_t elMat_2_0 = tmp_kernel_op_118*tmp_kernel_op_160 + tmp_kernel_op_158*tmp_kernel_op_40 + tmp_kernel_op_159*tmp_kernel_op_81;
+             const real_t elMat_2_1 = -tmp_kernel_op_134*tmp_kernel_op_158 - tmp_kernel_op_135*tmp_kernel_op_159 - tmp_kernel_op_136*tmp_kernel_op_160;
+             const real_t elMat_2_2 = -tmp_kernel_op_137*tmp_kernel_op_158 - tmp_kernel_op_138*tmp_kernel_op_159 - tmp_kernel_op_139*tmp_kernel_op_160;
+             const real_t elMat_2_3 = tmp_kernel_op_142*tmp_kernel_op_158 + tmp_kernel_op_145*tmp_kernel_op_159 + tmp_kernel_op_148*tmp_kernel_op_160;
+             const real_t elMat_2_4 = tmp_kernel_op_149*tmp_kernel_op_158 + tmp_kernel_op_150*tmp_kernel_op_159 + tmp_kernel_op_151*tmp_kernel_op_160;
+             const real_t elMat_2_5 = tmp_kernel_op_152*tmp_kernel_op_158 + tmp_kernel_op_153*tmp_kernel_op_159 + tmp_kernel_op_154*tmp_kernel_op_160;
+         
+             std::vector< uint_t > _data_rowIdx( 3 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/CMakeLists.txt b/operators/epsilon/CMakeLists.txt
index 9b499f0b4470d3f215b0d585b68b2581b78d601e..ed34aa1e02b4a3d6b6d00a381711014e8e9b4213 100644
--- a/operators/epsilon/CMakeLists.txt
+++ b/operators/epsilon/CMakeLists.txt
@@ -1,5 +1,13 @@
 add_library( opgen-epsilon
 
+   P2ElementwiseEpsilonAnnulusMap_0_0.cpp
+   P2ElementwiseEpsilonAnnulusMap_0_0.hpp
+   P2ElementwiseEpsilonAnnulusMap_0_1.cpp
+   P2ElementwiseEpsilonAnnulusMap_0_1.hpp
+   P2ElementwiseEpsilonAnnulusMap_1_0.cpp
+   P2ElementwiseEpsilonAnnulusMap_1_0.hpp
+   P2ElementwiseEpsilonAnnulusMap_1_1.cpp
+   P2ElementwiseEpsilonAnnulusMap_1_1.hpp
    P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp
    P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp
    P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp
@@ -41,6 +49,12 @@ add_library( opgen-epsilon
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-epsilon PRIVATE
 
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
@@ -71,6 +85,10 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp
       avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp
       avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
       noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
       noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
@@ -97,6 +115,12 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
 
    set_source_files_properties(
 
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
+      avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp
@@ -137,6 +161,16 @@ else()
 
    target_sources(opgen-epsilon PRIVATE
 
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp
       noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
       noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8b42120bc7d77a6cfad2782b4a41e38969e8a012
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp
@@ -0,0 +1,399 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseEpsilonAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseEpsilonAnnulusMap_0_0::P2ElementwiseEpsilonAnnulusMap_0_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                        size_t                                     minLevel,
+                                                                        size_t                                     maxLevel,
+                                                                        const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src,
+                                                const P2Function< real_t >& dst,
+                                                uint_t                      level,
+                                                DoFType                     flag,
+                                                UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                   const P2Function< idx_t >&                  src,
+                                                   const P2Function< idx_t >&                  dst,
+                                                   uint_t                                      level,
+                                                   DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         mu.communicate< Face, Cell >( level );
+         mu.communicate< Edge, Cell >( level );
+         mu.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muVertex     = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muEdge       = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_macro_2D(
+
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_muEdge,
+                _data_muVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+      }
+
+      ( *invDiag_ ).invertElementwise( level );
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseEpsilonAnnulusMap_0_0::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d18522d9718fd4434870608bb8022d68e6baea23
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp
@@ -0,0 +1,182 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// "Epsilon" operator.
+///
+/// Component trial: 0
+/// Component test:  0
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ 2 μ ε(u) : ε(v)
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseEpsilonAnnulusMap_0_0 : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                           public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseEpsilonAnnulusMap_0_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                       size_t                                     minLevel,
+                                       size_t                                     maxLevel,
+                                       const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    660     924      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    624     888      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+   /// Kernel type: computeInverseDiagonalOperatorValues
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    450     708      24      16      4              0                 0              1
+   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                       real_t* RESTRICT _data_invDiag_Vertex,
+                                                       real_t* RESTRICT _data_muEdge,
+                                                       real_t* RESTRICT _data_muVertex,
+                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                       int64_t          micro_edges_per_macro_edge,
+                                                       real_t           micro_edges_per_macro_edge_float,
+                                                       real_t           radRayVertex,
+                                                       real_t           radRefVertex,
+                                                       real_t           rayVertex_0,
+                                                       real_t           rayVertex_1,
+                                                       real_t           refVertex_0,
+                                                       real_t           refVertex_1,
+                                                       real_t           thrVertex_0,
+                                                       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f5ebbda6e556ac86df571d85f9d74d085e83b937
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp
@@ -0,0 +1,292 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseEpsilonAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseEpsilonAnnulusMap_0_1::P2ElementwiseEpsilonAnnulusMap_0_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                        size_t                                     minLevel,
+                                                                        size_t                                     maxLevel,
+                                                                        const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src,
+                                                const P2Function< real_t >& dst,
+                                                uint_t                      level,
+                                                DoFType                     flag,
+                                                UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                   const P2Function< idx_t >&                  src,
+                                                   const P2Function< idx_t >&                  dst,
+                                                   uint_t                                      level,
+                                                   DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa9c460812c298c2cb5537c9aee7210ef1e393d2
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp
@@ -0,0 +1,149 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// "Epsilon" operator.
+///
+/// Component trial: 1
+/// Component test:  0
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ 2 μ ε(u) : ε(v)
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseEpsilonAnnulusMap_0_1 : public Operator< P2Function< real_t >, P2Function< real_t > >
+{
+ public:
+   P2ElementwiseEpsilonAnnulusMap_0_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                       size_t                                     minLevel,
+                                       size_t                                     maxLevel,
+                                       const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    840     984      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    804     948      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+
+   P2Function< real_t > mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a136be0f9c0f4d4cc15aa470910f7863c3d322c
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp
@@ -0,0 +1,292 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseEpsilonAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseEpsilonAnnulusMap_1_0::P2ElementwiseEpsilonAnnulusMap_1_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                        size_t                                     minLevel,
+                                                                        size_t                                     maxLevel,
+                                                                        const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src,
+                                                const P2Function< real_t >& dst,
+                                                uint_t                      level,
+                                                DoFType                     flag,
+                                                UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                   const P2Function< idx_t >&                  src,
+                                                   const P2Function< idx_t >&                  dst,
+                                                   uint_t                                      level,
+                                                   DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ee7a1f4ab08ad3a0aeeefa80b3ee8f36bd1b6856
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp
@@ -0,0 +1,149 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// "Epsilon" operator.
+///
+/// Component trial: 0
+/// Component test:  1
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ 2 μ ε(u) : ε(v)
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseEpsilonAnnulusMap_1_0 : public Operator< P2Function< real_t >, P2Function< real_t > >
+{
+ public:
+   P2ElementwiseEpsilonAnnulusMap_1_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                       size_t                                     minLevel,
+                                       size_t                                     maxLevel,
+                                       const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    840     984      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    804     948      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+
+   P2Function< real_t > mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e05cb414709e321870d334ca62371b66f5867e69
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp
@@ -0,0 +1,399 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseEpsilonAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseEpsilonAnnulusMap_1_1::P2ElementwiseEpsilonAnnulusMap_1_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                        size_t                                     minLevel,
+                                                                        size_t                                     maxLevel,
+                                                                        const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src,
+                                                const P2Function< real_t >& dst,
+                                                uint_t                      level,
+                                                DoFType                     flag,
+                                                UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                   const P2Function< idx_t >&                  src,
+                                                   const P2Function< idx_t >&                  dst,
+                                                   uint_t                                      level,
+                                                   DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         mu.communicate< Face, Cell >( level );
+         mu.communicate< Edge, Cell >( level );
+         mu.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muVertex     = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muEdge       = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_macro_2D(
+
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_muEdge,
+                _data_muVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+      }
+
+      ( *invDiag_ ).invertElementwise( level );
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseEpsilonAnnulusMap_1_1::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..885ad0acab88804ab73e3b7d913613be47b7f74e
--- /dev/null
+++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp
@@ -0,0 +1,182 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// "Epsilon" operator.
+///
+/// Component trial: 1
+/// Component test:  1
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ 2 μ ε(u) : ε(v)
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseEpsilonAnnulusMap_1_1 : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                           public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseEpsilonAnnulusMap_1_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                       size_t                                     minLevel,
+                                       size_t                                     maxLevel,
+                                       const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    660     924      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    624     888      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+   /// Kernel type: computeInverseDiagonalOperatorValues
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    450     708      24      16      4              0                 0              1
+   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                       real_t* RESTRICT _data_invDiag_Vertex,
+                                                       real_t* RESTRICT _data_muEdge,
+                                                       real_t* RESTRICT _data_muVertex,
+                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                       int64_t          micro_edges_per_macro_edge,
+                                                       real_t           micro_edges_per_macro_edge_float,
+                                                       real_t           radRayVertex,
+                                                       real_t           radRefVertex,
+                                                       real_t           rayVertex_0,
+                                                       real_t           rayVertex_1,
+                                                       real_t           refVertex_0,
+                                                       real_t           refVertex_1,
+                                                       real_t           thrVertex_0,
+                                                       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..55b8c2e65a2e606458f637b8aa4f2cf72840fc47
--- /dev/null
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,899 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_51 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_28);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_28);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q]))));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_37);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_37);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_37);
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(tmp_qloop_47,tmp_qloop_49);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_48),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_50),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_52);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_56,tmp_qloop_59))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_58)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1])));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2])));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3])));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4])));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]))));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]))));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]))));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]))));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]))),tmp_qloop_43));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q])))),_mm256_mul_pd(tmp_qloop_45,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]))),tmp_qloop_42))),tmp_qloop_36));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_61),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_62)),tmp_qloop_36));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_63),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_64)),tmp_qloop_36));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_65),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_66)),tmp_qloop_36));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_67),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_68)),tmp_qloop_36));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_69),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_70)),tmp_qloop_36));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_62,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_76)),tmp_qloop_36));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_76)),tmp_qloop_36));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_76)),tmp_qloop_36));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_78)),tmp_qloop_36));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_78)),tmp_qloop_36));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_69),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,tmp_qloop_70),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]))),tmp_qloop_43))),tmp_qloop_36));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                   const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_28;
+                   const real_t tmp_qloop_35 = tmp_qloop_25*tmp_qloop_28;
+                   const real_t tmp_qloop_36 = tmp_qloop_32*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                   const real_t tmp_qloop_37 = tmp_qloop_27*0.5;
+                   const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_37;
+                   const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_37;
+                   const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_37;
+                   const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                   const real_t tmp_qloop_44 = tmp_qloop_32*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_43);
+                   const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                   const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                   const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                   const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_42;
+                   const real_t tmp_qloop_63 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_42;
+                   const real_t tmp_qloop_65 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_42;
+                   const real_t tmp_qloop_67 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_42;
+                   const real_t tmp_qloop_69 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_42;
+                   const real_t tmp_qloop_71 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]);
+                   const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_43);
+                   const real_t tmp_qloop_73 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]);
+                   const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_43);
+                   const real_t tmp_qloop_75 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]);
+                   const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_43);
+                   const real_t tmp_qloop_77 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]);
+                   const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_43);
+                   const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]) + tmp_qloop_36 + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_61 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_62);
+                   const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_63 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_64);
+                   const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_65 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_66);
+                   const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_67 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_68);
+                   const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_69 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_70);
+                   const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                   const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                   const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                   const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                   const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                   const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                   const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                   const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                   const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                   const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                   const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                   const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                   const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                   const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                   const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_32*tmp_qloop_69*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]) + tmp_qloop_36 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_43));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_28);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_28);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q]))));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_37);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_37);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_37);
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(tmp_qloop_47,tmp_qloop_49);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_48),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_50),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_52);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_56,tmp_qloop_59))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_58)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1])));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2])));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3])));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4])));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]))));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]))));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]))));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]))));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]))),tmp_qloop_43));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q])))),_mm256_mul_pd(tmp_qloop_45,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]))),tmp_qloop_42))),tmp_qloop_36));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_61),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_62)),tmp_qloop_36));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_63),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_64)),tmp_qloop_36));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_65),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_66)),tmp_qloop_36));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_67),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_68)),tmp_qloop_36));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_69),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_70)),tmp_qloop_36));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_62,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_72)),tmp_qloop_36));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_74)),tmp_qloop_36));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_76)),tmp_qloop_36));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_76)),tmp_qloop_36));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_76)),tmp_qloop_36));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_78)),tmp_qloop_36));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_78)),tmp_qloop_36));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_69),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,tmp_qloop_70),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]))),tmp_qloop_43))),tmp_qloop_36));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                   const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_28;
+                   const real_t tmp_qloop_35 = tmp_qloop_25*tmp_qloop_28;
+                   const real_t tmp_qloop_36 = tmp_qloop_32*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                   const real_t tmp_qloop_37 = tmp_qloop_27*0.5;
+                   const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_37;
+                   const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_37;
+                   const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_37;
+                   const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                   const real_t tmp_qloop_44 = tmp_qloop_32*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_43);
+                   const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                   const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                   const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                   const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_42;
+                   const real_t tmp_qloop_63 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_42;
+                   const real_t tmp_qloop_65 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_42;
+                   const real_t tmp_qloop_67 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_42;
+                   const real_t tmp_qloop_69 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_42;
+                   const real_t tmp_qloop_71 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]);
+                   const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_43);
+                   const real_t tmp_qloop_73 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]);
+                   const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_43);
+                   const real_t tmp_qloop_75 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]);
+                   const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_43);
+                   const real_t tmp_qloop_77 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]);
+                   const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_43);
+                   const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]) + tmp_qloop_36 + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_61 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_62);
+                   const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_63 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_64);
+                   const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_65 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_66);
+                   const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_67 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_68);
+                   const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_69 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_70);
+                   const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                   const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                   const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                   const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                   const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                   const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                   const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                   const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                   const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                   const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                   const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                   const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                   const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                   const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                   const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_32*tmp_qloop_69*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]) + tmp_qloop_36 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_43));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8e8141e4b44198f536ba484e1586d4610a3a238e
--- /dev/null
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,615 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_49 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_5,tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_11)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_5);
+                   const __m256d tmp_qloop_20 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_11),_mm256_mul_pd(tmp_qloop_11,tmp_qloop_11));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_8),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_5),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_24));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_28 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_25),_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_29);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_29);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_29);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q]))));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(tmp_qloop_45,tmp_qloop_47);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_46),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_48),_mm256_mul_pd(tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_50);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_46);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_56,tmp_qloop_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_57))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_56)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_54),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]))),tmp_qloop_42))),tmp_qloop_35));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                   const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                   const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                   const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                   const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                   const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                   const real_t tmp_qloop_31 = -tmp_qloop_27;
+                   const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_29;
+                   const real_t tmp_qloop_34 = tmp_qloop_26*tmp_qloop_29;
+                   const real_t tmp_qloop_35 = tmp_qloop_0*(tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])*(tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                   const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t tmp_qloop_38 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t tmp_qloop_39 = tmp_qloop_22*tmp_qloop_36;
+                   const real_t tmp_qloop_40 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                   const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                   const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                   const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                   const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                   const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                   const real_t tmp_qloop_58 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_42));
+                   const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_42));
+                   const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_42));
+                   const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_42));
+                   const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_42));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_5,tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_11)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_5);
+                   const __m256d tmp_qloop_20 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_11),_mm256_mul_pd(tmp_qloop_11,tmp_qloop_11));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_8),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_5),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_24));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_28 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_25),_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_29);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_29);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_29);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q]))));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(tmp_qloop_45,tmp_qloop_47);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_46),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_48),_mm256_mul_pd(tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_50);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_46);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_56,tmp_qloop_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_57))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_56)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_54),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]))),tmp_qloop_42))),tmp_qloop_35));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]))),tmp_qloop_42))),tmp_qloop_35));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                   const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                   const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                   const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                   const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                   const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                   const real_t tmp_qloop_31 = -tmp_qloop_27;
+                   const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_29;
+                   const real_t tmp_qloop_34 = tmp_qloop_26*tmp_qloop_29;
+                   const real_t tmp_qloop_35 = tmp_qloop_0*(tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])*(tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                   const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t tmp_qloop_38 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t tmp_qloop_39 = tmp_qloop_22*tmp_qloop_36;
+                   const real_t tmp_qloop_40 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                   const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                   const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                   const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                   const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                   const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                   const real_t tmp_qloop_58 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_42));
+                   const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_42));
+                   const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_42));
+                   const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_42));
+                   const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_42));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ec483e94453986105f195d2b90058bac9e41034f
--- /dev/null
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
@@ -0,0 +1,1097 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_54 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]))));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_28);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_28);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q]))));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]))));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_39);
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_39);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_39);
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_51);
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(tmp_qloop_50,tmp_qloop_52);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_51),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_49),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_53),_mm256_mul_pd(tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_51);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_62))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_61)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_52,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]))));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]))));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]))));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]))));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]))));
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]))),tmp_qloop_46));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_48),tmp_qloop_34),tmp_qloop_38));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_65),tmp_qloop_34),tmp_qloop_64));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_67),tmp_qloop_34),tmp_qloop_66));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_69),tmp_qloop_34),tmp_qloop_68));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_71),tmp_qloop_34),tmp_qloop_70));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_73),tmp_qloop_34),tmp_qloop_72));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_75),tmp_qloop_38),tmp_qloop_74));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),tmp_qloop_64),tmp_qloop_74));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),tmp_qloop_66),tmp_qloop_74));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),tmp_qloop_68),tmp_qloop_74));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_75),tmp_qloop_70),tmp_qloop_74));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_75),tmp_qloop_72),tmp_qloop_74));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_77),tmp_qloop_38),tmp_qloop_76));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_77),tmp_qloop_64),tmp_qloop_76));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),tmp_qloop_66),tmp_qloop_76));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),tmp_qloop_68),tmp_qloop_76));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_77),tmp_qloop_70),tmp_qloop_76));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_77),tmp_qloop_72),tmp_qloop_76));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_79),tmp_qloop_38),tmp_qloop_78));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_79),tmp_qloop_64),tmp_qloop_78));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_79),tmp_qloop_66),tmp_qloop_78));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_79),tmp_qloop_68),tmp_qloop_78));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_79),tmp_qloop_70),tmp_qloop_78));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_79),tmp_qloop_72),tmp_qloop_78));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_81),tmp_qloop_38),tmp_qloop_80));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_81),tmp_qloop_64),tmp_qloop_80));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_81),tmp_qloop_66),tmp_qloop_80));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_81),tmp_qloop_68),tmp_qloop_80));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_81),tmp_qloop_70),tmp_qloop_80));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_81),tmp_qloop_72),tmp_qloop_80));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_83),tmp_qloop_38),tmp_qloop_82));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_83),tmp_qloop_64),tmp_qloop_82));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83),tmp_qloop_66),tmp_qloop_82));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_83),tmp_qloop_68),tmp_qloop_82));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83),tmp_qloop_70),tmp_qloop_82));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_83),tmp_qloop_72),tmp_qloop_82));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                   const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]);
+                   const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                   const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                   const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                   const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_24*tmp_qloop_39;
+                   const real_t tmp_qloop_43 = tmp_qloop_25*tmp_qloop_39;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                   const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_46);
+                   const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                   const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                   const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                   const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                   const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                   const real_t tmp_qloop_63 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                   const real_t tmp_qloop_64 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1]);
+                   const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_44;
+                   const real_t tmp_qloop_66 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2]);
+                   const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_44;
+                   const real_t tmp_qloop_68 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3]);
+                   const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_44;
+                   const real_t tmp_qloop_70 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4]);
+                   const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_44;
+                   const real_t tmp_qloop_72 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5]);
+                   const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_44;
+                   const real_t tmp_qloop_74 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]);
+                   const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_46);
+                   const real_t tmp_qloop_76 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]);
+                   const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_46);
+                   const real_t tmp_qloop_78 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]);
+                   const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_46);
+                   const real_t tmp_qloop_80 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]);
+                   const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_46);
+                   const real_t tmp_qloop_82 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]);
+                   const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_46);
+                   const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                   const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                   const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                   const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                   const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]))));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_28);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_28);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q]))));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]))));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_39);
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_39);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_39);
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_51);
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(tmp_qloop_50,tmp_qloop_52);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_51),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_49),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_53),_mm256_mul_pd(tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_51);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_62))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_61)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_52,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]))));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]))));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]))));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]))));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]))));
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]))),tmp_qloop_46));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_48),tmp_qloop_34),tmp_qloop_38));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_65),tmp_qloop_34),tmp_qloop_64));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_67),tmp_qloop_34),tmp_qloop_66));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_69),tmp_qloop_34),tmp_qloop_68));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_71),tmp_qloop_34),tmp_qloop_70));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_73),tmp_qloop_34),tmp_qloop_72));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_75),tmp_qloop_38),tmp_qloop_74));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),tmp_qloop_64),tmp_qloop_74));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),tmp_qloop_66),tmp_qloop_74));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),tmp_qloop_68),tmp_qloop_74));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_75),tmp_qloop_70),tmp_qloop_74));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_75),tmp_qloop_72),tmp_qloop_74));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_77),tmp_qloop_38),tmp_qloop_76));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_77),tmp_qloop_64),tmp_qloop_76));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),tmp_qloop_66),tmp_qloop_76));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),tmp_qloop_68),tmp_qloop_76));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_77),tmp_qloop_70),tmp_qloop_76));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_77),tmp_qloop_72),tmp_qloop_76));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_79),tmp_qloop_38),tmp_qloop_78));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_79),tmp_qloop_64),tmp_qloop_78));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_79),tmp_qloop_66),tmp_qloop_78));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_79),tmp_qloop_68),tmp_qloop_78));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_79),tmp_qloop_70),tmp_qloop_78));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_79),tmp_qloop_72),tmp_qloop_78));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_81),tmp_qloop_38),tmp_qloop_80));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_81),tmp_qloop_64),tmp_qloop_80));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_81),tmp_qloop_66),tmp_qloop_80));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_81),tmp_qloop_68),tmp_qloop_80));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_81),tmp_qloop_70),tmp_qloop_80));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_81),tmp_qloop_72),tmp_qloop_80));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_83),tmp_qloop_38),tmp_qloop_82));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_83),tmp_qloop_64),tmp_qloop_82));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83),tmp_qloop_66),tmp_qloop_82));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_83),tmp_qloop_68),tmp_qloop_82));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83),tmp_qloop_70),tmp_qloop_82));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_83),tmp_qloop_72),tmp_qloop_82));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                   const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]);
+                   const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                   const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                   const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                   const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_24*tmp_qloop_39;
+                   const real_t tmp_qloop_43 = tmp_qloop_25*tmp_qloop_39;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                   const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_46);
+                   const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                   const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                   const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                   const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                   const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                   const real_t tmp_qloop_63 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                   const real_t tmp_qloop_64 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1]);
+                   const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_44;
+                   const real_t tmp_qloop_66 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2]);
+                   const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_44;
+                   const real_t tmp_qloop_68 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3]);
+                   const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_44;
+                   const real_t tmp_qloop_70 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4]);
+                   const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_44;
+                   const real_t tmp_qloop_72 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5]);
+                   const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_44;
+                   const real_t tmp_qloop_74 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]);
+                   const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_46);
+                   const real_t tmp_qloop_76 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]);
+                   const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_46);
+                   const real_t tmp_qloop_78 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]);
+                   const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_46);
+                   const real_t tmp_qloop_80 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]);
+                   const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_46);
+                   const real_t tmp_qloop_82 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]);
+                   const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_46);
+                   const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                   const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                   const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                   const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                   const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a2349892bd452be2d3d35d80c8c93b5ae289a911
--- /dev/null
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
@@ -0,0 +1,1097 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_54 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]))));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_28);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_28);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q]))));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]))));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_39);
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_39);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_39);
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_51);
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(tmp_qloop_50,tmp_qloop_52);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_51),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_49),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_53),_mm256_mul_pd(tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_51);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_62))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_61)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_52,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]))));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]))));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]))));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]))));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]))));
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]))),tmp_qloop_46));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_48),tmp_qloop_34),tmp_qloop_38));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_65),tmp_qloop_38),tmp_qloop_64));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_67),tmp_qloop_38),tmp_qloop_66));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_69),tmp_qloop_38),tmp_qloop_68));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_71),tmp_qloop_38),tmp_qloop_70));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_73),tmp_qloop_38),tmp_qloop_72));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_75),tmp_qloop_34),tmp_qloop_74));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),tmp_qloop_64),tmp_qloop_74));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),tmp_qloop_66),tmp_qloop_74));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),tmp_qloop_68),tmp_qloop_74));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_75),tmp_qloop_70),tmp_qloop_74));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_75),tmp_qloop_72),tmp_qloop_74));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_77),tmp_qloop_34),tmp_qloop_76));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_77),tmp_qloop_64),tmp_qloop_76));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),tmp_qloop_66),tmp_qloop_76));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),tmp_qloop_68),tmp_qloop_76));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_77),tmp_qloop_70),tmp_qloop_76));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_77),tmp_qloop_72),tmp_qloop_76));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_79),tmp_qloop_34),tmp_qloop_78));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_79),tmp_qloop_64),tmp_qloop_78));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_79),tmp_qloop_66),tmp_qloop_78));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_79),tmp_qloop_68),tmp_qloop_78));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_79),tmp_qloop_70),tmp_qloop_78));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_79),tmp_qloop_72),tmp_qloop_78));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_81),tmp_qloop_34),tmp_qloop_80));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_81),tmp_qloop_64),tmp_qloop_80));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_81),tmp_qloop_66),tmp_qloop_80));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_81),tmp_qloop_68),tmp_qloop_80));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_81),tmp_qloop_70),tmp_qloop_80));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_81),tmp_qloop_72),tmp_qloop_80));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_83),tmp_qloop_34),tmp_qloop_82));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_83),tmp_qloop_64),tmp_qloop_82));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83),tmp_qloop_66),tmp_qloop_82));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_83),tmp_qloop_68),tmp_qloop_82));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83),tmp_qloop_70),tmp_qloop_82));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_83),tmp_qloop_72),tmp_qloop_82));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                   const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]);
+                   const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                   const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                   const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                   const real_t tmp_qloop_40 = tmp_qloop_24*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_25*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_21*tmp_qloop_39;
+                   const real_t tmp_qloop_43 = tmp_qloop_30*tmp_qloop_39;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                   const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_46);
+                   const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                   const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                   const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                   const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                   const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                   const real_t tmp_qloop_63 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                   const real_t tmp_qloop_64 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]);
+                   const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_44;
+                   const real_t tmp_qloop_66 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]);
+                   const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_44;
+                   const real_t tmp_qloop_68 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]);
+                   const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_44;
+                   const real_t tmp_qloop_70 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]);
+                   const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_44;
+                   const real_t tmp_qloop_72 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]);
+                   const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_44;
+                   const real_t tmp_qloop_74 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]);
+                   const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_46);
+                   const real_t tmp_qloop_76 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]);
+                   const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_46);
+                   const real_t tmp_qloop_78 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]);
+                   const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_46);
+                   const real_t tmp_qloop_80 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]);
+                   const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_46);
+                   const real_t tmp_qloop_82 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]);
+                   const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_46);
+                   const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                   const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                   const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                   const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                   const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_28);
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]))));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_28);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_28);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q]))));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]))));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_39);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_39);
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_39);
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_39);
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_51);
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(tmp_qloop_50,tmp_qloop_52);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55);
+                   const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_51),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_49),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_53),_mm256_mul_pd(tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55);
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_54,tmp_qloop_54,tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_51);
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_62))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_61)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_52,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))));
+                   const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_33,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]))));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]))));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]))));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]))));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]))),tmp_qloop_46));
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]))));
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]))),tmp_qloop_46));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_48),tmp_qloop_34),tmp_qloop_38));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_65),tmp_qloop_38),tmp_qloop_64));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_67),tmp_qloop_38),tmp_qloop_66));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_69),tmp_qloop_38),tmp_qloop_68));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_71),tmp_qloop_38),tmp_qloop_70));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_73),tmp_qloop_38),tmp_qloop_72));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_75),tmp_qloop_34),tmp_qloop_74));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),tmp_qloop_64),tmp_qloop_74));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),tmp_qloop_66),tmp_qloop_74));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),tmp_qloop_68),tmp_qloop_74));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_75),tmp_qloop_70),tmp_qloop_74));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_75),tmp_qloop_72),tmp_qloop_74));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_77),tmp_qloop_34),tmp_qloop_76));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_77),tmp_qloop_64),tmp_qloop_76));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),tmp_qloop_66),tmp_qloop_76));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),tmp_qloop_68),tmp_qloop_76));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_77),tmp_qloop_70),tmp_qloop_76));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_77),tmp_qloop_72),tmp_qloop_76));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_79),tmp_qloop_34),tmp_qloop_78));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_79),tmp_qloop_64),tmp_qloop_78));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_79),tmp_qloop_66),tmp_qloop_78));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_79),tmp_qloop_68),tmp_qloop_78));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_79),tmp_qloop_70),tmp_qloop_78));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_79),tmp_qloop_72),tmp_qloop_78));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_81),tmp_qloop_34),tmp_qloop_80));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_81),tmp_qloop_64),tmp_qloop_80));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_81),tmp_qloop_66),tmp_qloop_80));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_81),tmp_qloop_68),tmp_qloop_80));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_81),tmp_qloop_70),tmp_qloop_80));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_81),tmp_qloop_72),tmp_qloop_80));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_83),tmp_qloop_34),tmp_qloop_82));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_83),tmp_qloop_64),tmp_qloop_82));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83),tmp_qloop_66),tmp_qloop_82));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_83),tmp_qloop_68),tmp_qloop_82));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83),tmp_qloop_70),tmp_qloop_82));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_63,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_83),tmp_qloop_72),tmp_qloop_82));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                   const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]);
+                   const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                   const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                   const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                   const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                   const real_t tmp_qloop_40 = tmp_qloop_24*tmp_qloop_39;
+                   const real_t tmp_qloop_41 = tmp_qloop_25*tmp_qloop_39;
+                   const real_t tmp_qloop_42 = tmp_qloop_21*tmp_qloop_39;
+                   const real_t tmp_qloop_43 = tmp_qloop_30*tmp_qloop_39;
+                   const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                   const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                   const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_46);
+                   const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                   const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                   const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                   const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                   const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                   const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                   const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                   const real_t tmp_qloop_63 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                   const real_t tmp_qloop_64 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]);
+                   const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_44;
+                   const real_t tmp_qloop_66 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]);
+                   const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_44;
+                   const real_t tmp_qloop_68 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]);
+                   const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_44;
+                   const real_t tmp_qloop_70 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]);
+                   const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_44;
+                   const real_t tmp_qloop_72 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]);
+                   const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_44;
+                   const real_t tmp_qloop_74 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]);
+                   const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_46);
+                   const real_t tmp_qloop_76 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]);
+                   const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_46);
+                   const real_t tmp_qloop_78 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]);
+                   const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_46);
+                   const real_t tmp_qloop_80 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]);
+                   const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_46);
+                   const real_t tmp_qloop_82 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]);
+                   const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_46);
+                   const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                   const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                   const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                   const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                   const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                   const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                   const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                   const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..62e06fe5d1f7a7676f34fb9f35ad43d10e74a23d
--- /dev/null
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
@@ -0,0 +1,899 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_51 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_5,tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_11)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_5);
+                   const __m256d tmp_qloop_20 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_11),_mm256_mul_pd(tmp_qloop_11,tmp_qloop_11));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_8),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_5),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_24));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_28 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_25),_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_29);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_29);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_29);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]))));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_37);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_37);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_37);
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(tmp_qloop_47,tmp_qloop_49);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_48),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_50),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_52);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_56,tmp_qloop_59))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_58)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1])));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2])));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3])));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4])));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]))));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]))));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]))));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]))));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]))),tmp_qloop_43));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q])))),_mm256_mul_pd(tmp_qloop_45,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_61),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_62)),tmp_qloop_33));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_63),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_64)),tmp_qloop_33));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_65),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_66)),tmp_qloop_33));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_67),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_68)),tmp_qloop_33));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_69),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_70)),tmp_qloop_33));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_62,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_76)),tmp_qloop_33));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_76)),tmp_qloop_33));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_76)),tmp_qloop_33));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_78)),tmp_qloop_33));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_78)),tmp_qloop_33));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_69),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,tmp_qloop_70),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]))),tmp_qloop_43))),tmp_qloop_33));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                   const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                   const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                   const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                   const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                   const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                   const real_t tmp_qloop_31 = -tmp_qloop_27;
+                   const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                   const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                   const real_t tmp_qloop_36 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]);
+                   const real_t tmp_qloop_37 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_38 = tmp_qloop_22*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_37;
+                   const real_t tmp_qloop_40 = tmp_qloop_25*tmp_qloop_37;
+                   const real_t tmp_qloop_41 = tmp_qloop_26*tmp_qloop_37;
+                   const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                   const real_t tmp_qloop_44 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_43);
+                   const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                   const real_t tmp_qloop_54 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_46);
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                   const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                   const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_13*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_1*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_13*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_42;
+                   const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_42;
+                   const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_42;
+                   const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_42;
+                   const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_42;
+                   const real_t tmp_qloop_71 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]);
+                   const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_43);
+                   const real_t tmp_qloop_73 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]);
+                   const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_43);
+                   const real_t tmp_qloop_75 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]);
+                   const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_43);
+                   const real_t tmp_qloop_77 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]);
+                   const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_43);
+                   const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]) + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_61 + tmp_qloop_45*tmp_qloop_62);
+                   const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_63 + tmp_qloop_45*tmp_qloop_64);
+                   const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_65 + tmp_qloop_45*tmp_qloop_66);
+                   const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_67 + tmp_qloop_45*tmp_qloop_68);
+                   const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_69 + tmp_qloop_45*tmp_qloop_70);
+                   const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                   const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                   const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                   const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                   const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                   const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                   const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                   const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                   const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                   const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                   const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                   const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                   const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                   const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                   const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_0*tmp_qloop_69*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]) + tmp_qloop_33 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_43));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_5,tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_11)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_5);
+                   const __m256d tmp_qloop_20 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_11),_mm256_mul_pd(tmp_qloop_11,tmp_qloop_11));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_8),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_5),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_24));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_28 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_25),_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_29);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_29);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_29);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]))));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_37);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_37);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_37);
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_37);
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(tmp_qloop_47,tmp_qloop_49);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_48),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_50),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_52);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_56,tmp_qloop_59))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_58)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1])));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2])));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3])));
+                   const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4])));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]))));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]))));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]))));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]))),tmp_qloop_43));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]))));
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]))),tmp_qloop_43));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q])))),_mm256_mul_pd(tmp_qloop_45,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_61),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_62)),tmp_qloop_33));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_63),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_64)),tmp_qloop_33));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_65),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_66)),tmp_qloop_33));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_67),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_68)),tmp_qloop_33));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_69),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_70)),tmp_qloop_33));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_62,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_71),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_72)),tmp_qloop_33));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_74)),tmp_qloop_33));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_66,tmp_qloop_76)),tmp_qloop_33));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_76)),tmp_qloop_33));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_76)),tmp_qloop_33));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_68,tmp_qloop_78)),tmp_qloop_33));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_77),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_78)),tmp_qloop_33));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_69),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,tmp_qloop_70),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]))),tmp_qloop_43))),tmp_qloop_33));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                   const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                   const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                   const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                   const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                   const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                   const real_t tmp_qloop_31 = -tmp_qloop_27;
+                   const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                   const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                   const real_t tmp_qloop_36 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]);
+                   const real_t tmp_qloop_37 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_38 = tmp_qloop_22*tmp_qloop_37;
+                   const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_37;
+                   const real_t tmp_qloop_40 = tmp_qloop_25*tmp_qloop_37;
+                   const real_t tmp_qloop_41 = tmp_qloop_26*tmp_qloop_37;
+                   const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                   const real_t tmp_qloop_44 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_43);
+                   const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                   const real_t tmp_qloop_54 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_46);
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                   const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                   const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_13*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_1*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_13*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_42;
+                   const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_42;
+                   const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_42;
+                   const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_42;
+                   const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_42;
+                   const real_t tmp_qloop_71 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]);
+                   const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_43);
+                   const real_t tmp_qloop_73 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]);
+                   const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_43);
+                   const real_t tmp_qloop_75 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]);
+                   const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_43);
+                   const real_t tmp_qloop_77 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]);
+                   const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_43);
+                   const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]) + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_61 + tmp_qloop_45*tmp_qloop_62);
+                   const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_63 + tmp_qloop_45*tmp_qloop_64);
+                   const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_65 + tmp_qloop_45*tmp_qloop_66);
+                   const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_67 + tmp_qloop_45*tmp_qloop_68);
+                   const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_69 + tmp_qloop_45*tmp_qloop_70);
+                   const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                   const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                   const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                   const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                   const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                   const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                   const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                   const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                   const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                   const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                   const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                   const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                   const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                   const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                   const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_0*tmp_qloop_69*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]) + tmp_qloop_33 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_43));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f8b32fce8a4d9ecac8f22f0c9283a82d268ef388
--- /dev/null
+++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,615 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_49 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5])));
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_5,tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_11)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_5);
+                   const __m256d tmp_qloop_20 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_11),_mm256_mul_pd(tmp_qloop_11,tmp_qloop_11));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_8),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_5),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_24));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_28 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_25),_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_29);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_29);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_29);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(tmp_qloop_45,tmp_qloop_47);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_46),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_48),_mm256_mul_pd(tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_50);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_46);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_56,tmp_qloop_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_57))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_56)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_54),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]))),tmp_qloop_42))),tmp_qloop_33));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                   const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                   const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                   const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                   const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                   const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                   const real_t tmp_qloop_31 = -tmp_qloop_27;
+                   const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                   const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                   const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                   const real_t tmp_qloop_38 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t tmp_qloop_40 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                   const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                   const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                   const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                   const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                   const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                   const real_t tmp_qloop_58 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_42));
+                   const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_42));
+                   const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_42));
+                   const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_42));
+                   const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_42));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(_mm256_mul_pd(mu_dof_1,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_2,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_3,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_4,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(mu_dof_5,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5])));
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_5,tmp_qloop_5);
+                   const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9);
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_11)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17));
+                   const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_5);
+                   const __m256d tmp_qloop_20 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_11),_mm256_mul_pd(tmp_qloop_11,tmp_qloop_11));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_8),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_5),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_24));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_21),tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,tmp_qloop_5),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_28 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_25),_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27)));
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_29);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]))));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_29);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_29);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36);
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(tmp_qloop_45,tmp_qloop_47);
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_46),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_48),_mm256_mul_pd(tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_50);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_49,tmp_qloop_49,tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_46);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q]),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_56,tmp_qloop_57),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_57))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_56)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_54),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]))),tmp_qloop_42))),tmp_qloop_33));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_58,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5])))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5])))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))),tmp_qloop_41)),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]))),tmp_qloop_42))),tmp_qloop_33));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                   const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                   const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                   const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                   const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                   const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                   const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                   const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                   const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                   const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                   const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                   const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                   const real_t tmp_qloop_31 = -tmp_qloop_27;
+                   const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                   const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                   const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                   const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                   const real_t tmp_qloop_38 = tmp_qloop_31*tmp_qloop_36;
+                   const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_36;
+                   const real_t tmp_qloop_40 = tmp_qloop_26*tmp_qloop_36;
+                   const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                   const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                   const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                   const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                   const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                   const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                   const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                   const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                   const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                   const real_t tmp_qloop_58 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_42));
+                   const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_42));
+                   const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_42));
+                   const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_42));
+                   const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_42));
+                   const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_42));
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4bb2bcadbb07413bfe65254ec7af886c73203689
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,531 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_51 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]);
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_35 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_32*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                const real_t tmp_qloop_37 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_44 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]) + tmp_qloop_36 + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_61 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_63 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_65 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_67 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_69 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_32*tmp_qloop_69*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]) + tmp_qloop_36 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]);
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_35 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_32*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                const real_t tmp_qloop_37 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_44 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]) + tmp_qloop_36 + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_61 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_63 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_65 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_67 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_69 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_32*tmp_qloop_69*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]) + tmp_qloop_36 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4dd48d0732be7e342c117ad8e40dab15a471317a
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,389 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_49 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_34 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_0*(tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])*(tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_26*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                const real_t tmp_qloop_58 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_42));
+                const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_42));
+                const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_42));
+                const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_42));
+                const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_42));
+                const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_42));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_34 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_0*(tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])*(tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_26*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                const real_t tmp_qloop_58 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_42));
+                const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_42));
+                const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_42));
+                const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_42));
+                const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_42));
+                const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]) + tmp_qloop_35 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_42));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..560ce41a5feadae9c35279879339a76db772cef5
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,689 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_51 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]);
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_35 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_32*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                const real_t tmp_qloop_37 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_44 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]) + tmp_qloop_36 + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_61 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_63 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_65 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_67 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_69 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_32*tmp_qloop_69*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]) + tmp_qloop_36 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]);
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_35 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_32*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                const real_t tmp_qloop_37 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_44 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]) + tmp_qloop_36 + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_61 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_63 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_65 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_67 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33*tmp_qloop_69 + tmp_qloop_36 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_36 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_32*tmp_qloop_69*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]) + tmp_qloop_36 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6a04e417652b926762acdfaff514d07c635fc9c5
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp
@@ -0,0 +1,629 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_54 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0cc3280f9e33623f54bb5052c5438695ebedf625
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp
@@ -0,0 +1,787 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_54 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c77c7bfd870120702b2ddf52130e389e2738e6c0
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp
@@ -0,0 +1,629 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_54 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2c108806281dafcc36fe5be795149f270d10c5d0
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,787 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_54 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_GRAY*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_27*1.0;
+                const real_t tmp_qloop_29 = tmp_qloop_21*tmp_qloop_28;
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_28*tmp_qloop_30;
+                const real_t tmp_qloop_32 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_33 = tmp_qloop_32*(tmp_qloop_29*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]);
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_28;
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_28;
+                const real_t tmp_qloop_37 = tmp_qloop_32*(tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]);
+                const real_t tmp_qloop_39 = tmp_qloop_27*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_24*tmp_qloop_39;
+                const real_t tmp_qloop_41 = tmp_qloop_25*tmp_qloop_39;
+                const real_t tmp_qloop_42 = tmp_qloop_21*tmp_qloop_39;
+                const real_t tmp_qloop_43 = tmp_qloop_30*tmp_qloop_39;
+                const real_t tmp_qloop_44 = tmp_qloop_42*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_47 = tmp_qloop_32*2.0;
+                const real_t tmp_qloop_48 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_46);
+                const real_t tmp_qloop_49 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_50 = (tmp_qloop_49*tmp_qloop_49);
+                const real_t tmp_qloop_51 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_52 = (tmp_qloop_51*tmp_qloop_51);
+                const real_t tmp_qloop_53 = tmp_qloop_50 + tmp_qloop_52;
+                const real_t tmp_qloop_55 = pow(tmp_qloop_53, -0.50000000000000000)*tmp_qloop_54*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_49*tmp_qloop_55;
+                const real_t tmp_qloop_57 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_51) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_49);
+                const real_t tmp_qloop_58 = pow(tmp_qloop_53, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_59 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_60 = tmp_qloop_51*tmp_qloop_55;
+                const real_t tmp_qloop_61 = tmp_qloop_58*(radRayVertex + tmp_qloop_54*tmp_qloop_57);
+                const real_t tmp_qloop_62 = tmp_qloop_49*tmp_qloop_51;
+                const real_t tmp_qloop_63 = abs_det_jac_affine_BLUE*abs((tmp_qloop_0*tmp_qloop_56 - tmp_qloop_61*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_60 + tmp_qloop_59*tmp_qloop_62) - (tmp_qloop_0*tmp_qloop_60 + tmp_qloop_50*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_56 - tmp_qloop_52*tmp_qloop_59))*_data_q_w[q];
+                const real_t tmp_qloop_64 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]);
+                const real_t tmp_qloop_65 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_66 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]);
+                const real_t tmp_qloop_67 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_68 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]);
+                const real_t tmp_qloop_69 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]);
+                const real_t tmp_qloop_71 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_72 = tmp_qloop_33*(tmp_qloop_29*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_31*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]);
+                const real_t tmp_qloop_73 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_74 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]);
+                const real_t tmp_qloop_75 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_46);
+                const real_t tmp_qloop_76 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]);
+                const real_t tmp_qloop_77 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_46);
+                const real_t tmp_qloop_78 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]);
+                const real_t tmp_qloop_79 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_46);
+                const real_t tmp_qloop_80 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]);
+                const real_t tmp_qloop_81 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_46);
+                const real_t tmp_qloop_82 = tmp_qloop_37*(tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]);
+                const real_t tmp_qloop_83 = tmp_qloop_47*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_46);
+                const real_t q_tmp_0_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_38 + tmp_qloop_45*tmp_qloop_48);
+                const real_t q_tmp_0_1 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_65 + tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_3 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_4 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_71 + tmp_qloop_70);
+                const real_t q_tmp_0_5 = tmp_qloop_63*(tmp_qloop_38 + tmp_qloop_48*tmp_qloop_73 + tmp_qloop_72);
+                const real_t q_tmp_1_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_1_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_75 + tmp_qloop_74);
+                const real_t q_tmp_2_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_2_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_4_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_81 + tmp_qloop_80);
+                const real_t q_tmp_5_0 = tmp_qloop_63*(tmp_qloop_34 + tmp_qloop_45*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_1 = tmp_qloop_63*(tmp_qloop_64 + tmp_qloop_65*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_2 = tmp_qloop_63*(tmp_qloop_66 + tmp_qloop_67*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_3 = tmp_qloop_63*(tmp_qloop_68 + tmp_qloop_69*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_4 = tmp_qloop_63*(tmp_qloop_70 + tmp_qloop_71*tmp_qloop_83 + tmp_qloop_82);
+                const real_t q_tmp_5_5 = tmp_qloop_63*(tmp_qloop_72 + tmp_qloop_73*tmp_qloop_83 + tmp_qloop_82);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6113c2e380e5df9d1d1ec1566eb53487db5f47bb
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp
@@ -0,0 +1,531 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_51 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_36 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]);
+                const real_t tmp_qloop_37 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_22*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_26*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_44 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_13*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_1*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_13*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]) + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_61 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_63 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_65 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_67 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_69 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_0*tmp_qloop_69*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]) + tmp_qloop_33 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_36 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]);
+                const real_t tmp_qloop_37 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_22*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_26*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_44 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_13*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_1*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_13*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]) + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_61 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_63 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_65 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_67 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_69 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_0*tmp_qloop_69*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]) + tmp_qloop_33 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0e94f7482b8aeb17141ccaead527b2beeaee9f4a
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,389 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_49 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_26*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                const real_t tmp_qloop_58 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_42));
+                const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_42));
+                const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_42));
+                const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_42));
+                const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_42));
+                const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_42));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_36 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36;
+                const real_t tmp_qloop_38 = tmp_qloop_31*tmp_qloop_36;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_36;
+                const real_t tmp_qloop_40 = tmp_qloop_26*tmp_qloop_36;
+                const real_t tmp_qloop_41 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_42 = tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_44 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                const real_t tmp_qloop_46 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = tmp_qloop_45 + tmp_qloop_47;
+                const real_t tmp_qloop_50 = pow(tmp_qloop_48, -0.50000000000000000)*tmp_qloop_49*1.0;
+                const real_t tmp_qloop_51 = tmp_qloop_44*tmp_qloop_50;
+                const real_t tmp_qloop_52 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_46) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_44);
+                const real_t tmp_qloop_53 = pow(tmp_qloop_48, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_54 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_55 = tmp_qloop_46*tmp_qloop_50;
+                const real_t tmp_qloop_56 = tmp_qloop_53*(radRayVertex + tmp_qloop_49*tmp_qloop_52);
+                const real_t tmp_qloop_57 = tmp_qloop_44*tmp_qloop_46;
+                const real_t tmp_qloop_58 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_51 - tmp_qloop_56*tmp_qloop_57)*(tmp_qloop_13*tmp_qloop_55 + tmp_qloop_54*tmp_qloop_57) - (tmp_qloop_1*tmp_qloop_55 + tmp_qloop_45*tmp_qloop_56)*(tmp_qloop_13*tmp_qloop_51 - tmp_qloop_47*tmp_qloop_54))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_42));
+                const real_t q_tmp_1_1 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_42));
+                const real_t q_tmp_2_2 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_42));
+                const real_t q_tmp_3_3 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_42));
+                const real_t q_tmp_4_4 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_42));
+                const real_t q_tmp_5_5 = tmp_qloop_58*(tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]) + tmp_qloop_33 + tmp_qloop_43*(tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_41)*(tmp_qloop_37*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_42));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..27e30834dd996882ad0f3cda5ff1d30250d52192
--- /dev/null
+++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp
@@ -0,0 +1,689 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseEpsilonAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_1 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_2 = -tmp_qloop_1;
+       const real_t tmp_qloop_12 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_13 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_14 = -tmp_qloop_13;
+       const real_t tmp_qloop_15 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_16 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_17 = -tmp_qloop_16*1.0 / (-tmp_qloop_12*tmp_qloop_14 + tmp_qloop_15*tmp_qloop_2);
+       const real_t tmp_qloop_51 = tmp_qloop_16*1.0 / (-tmp_qloop_1*tmp_qloop_15 + tmp_qloop_12*tmp_qloop_13);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*2.0*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_36 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]);
+                const real_t tmp_qloop_37 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_22*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_26*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_44 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*abs((tmp_qloop_1*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_13*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_1*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_13*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]) + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_61 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_63 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_65 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_67 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_69 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_0*tmp_qloop_69*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]) + tmp_qloop_33 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_4 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_7 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_0 = mu_dof_0*2.0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*2.0*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*2.0*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*2.0*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*2.0*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*2.0*_data_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_5 = p_affine_0_1 - tmp_qloop_3*_data_q_p_0[q] - tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = p_affine_0_0 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = (tmp_qloop_5*tmp_qloop_5);
+                const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9;
+                const real_t tmp_qloop_18 = pow(tmp_qloop_11, -0.50000000000000000)*tmp_qloop_17*1.0;
+                const real_t tmp_qloop_19 = tmp_qloop_18*tmp_qloop_5;
+                const real_t tmp_qloop_20 = pow(tmp_qloop_11, -1.5000000000000000);
+                const real_t tmp_qloop_21 = radRayVertex + tmp_qloop_17*(tmp_qloop_14*(-rayVertex_0 + tmp_qloop_8) - tmp_qloop_2*(-rayVertex_1 + tmp_qloop_5));
+                const real_t tmp_qloop_22 = -tmp_qloop_19*tmp_qloop_2 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_9*1.0;
+                const real_t tmp_qloop_23 = tmp_qloop_18*tmp_qloop_8;
+                const real_t tmp_qloop_24 = tmp_qloop_20*tmp_qloop_21*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_10*tmp_qloop_24 + tmp_qloop_14*tmp_qloop_23;
+                const real_t tmp_qloop_26 = tmp_qloop_2*tmp_qloop_23 + tmp_qloop_20*tmp_qloop_21*tmp_qloop_5*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_27 = tmp_qloop_14*tmp_qloop_19 - tmp_qloop_24*tmp_qloop_5*tmp_qloop_8;
+                const real_t tmp_qloop_28 = 1.0 / (tmp_qloop_22*tmp_qloop_25 + tmp_qloop_26*tmp_qloop_27);
+                const real_t tmp_qloop_29 = tmp_qloop_28*1.0;
+                const real_t tmp_qloop_30 = tmp_qloop_22*tmp_qloop_29;
+                const real_t tmp_qloop_31 = -tmp_qloop_27;
+                const real_t tmp_qloop_32 = tmp_qloop_29*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_0*(tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])*(tmp_qloop_30*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_32*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]);
+                const real_t tmp_qloop_34 = tmp_qloop_25*tmp_qloop_29;
+                const real_t tmp_qloop_35 = tmp_qloop_26*tmp_qloop_29;
+                const real_t tmp_qloop_36 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]);
+                const real_t tmp_qloop_37 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_38 = tmp_qloop_22*tmp_qloop_37;
+                const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_37;
+                const real_t tmp_qloop_40 = tmp_qloop_25*tmp_qloop_37;
+                const real_t tmp_qloop_41 = tmp_qloop_26*tmp_qloop_37;
+                const real_t tmp_qloop_42 = tmp_qloop_40*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_44 = tmp_qloop_0*2.0;
+                const real_t tmp_qloop_45 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_43);
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_3*_data_q_p_0[q] + tmp_qloop_4*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_1*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_13*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*abs((tmp_qloop_1*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_13*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_1*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_13*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_68 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_71 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]);
+                const real_t tmp_qloop_72 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_43);
+                const real_t tmp_qloop_73 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]);
+                const real_t tmp_qloop_74 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_43);
+                const real_t tmp_qloop_75 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]);
+                const real_t tmp_qloop_76 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_43);
+                const real_t tmp_qloop_77 = tmp_qloop_0*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]);
+                const real_t tmp_qloop_78 = tmp_qloop_44*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_43);
+                const real_t q_tmp_0_0 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*(tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]) + tmp_qloop_45*(tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_42));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_61 + tmp_qloop_45*tmp_qloop_62);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_63 + tmp_qloop_45*tmp_qloop_64);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_65 + tmp_qloop_45*tmp_qloop_66);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_67 + tmp_qloop_45*tmp_qloop_68);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_36*tmp_qloop_69 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_61*tmp_qloop_71 + tmp_qloop_62*tmp_qloop_72);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_71 + tmp_qloop_64*tmp_qloop_72);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_71 + tmp_qloop_66*tmp_qloop_72);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_71 + tmp_qloop_68*tmp_qloop_72);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_71 + tmp_qloop_70*tmp_qloop_72);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_63*tmp_qloop_73 + tmp_qloop_64*tmp_qloop_74);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_73 + tmp_qloop_66*tmp_qloop_74);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_73 + tmp_qloop_68*tmp_qloop_74);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_73 + tmp_qloop_70*tmp_qloop_74);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_65*tmp_qloop_75 + tmp_qloop_66*tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_75 + tmp_qloop_68*tmp_qloop_76);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_75 + tmp_qloop_70*tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_67*tmp_qloop_77 + tmp_qloop_68*tmp_qloop_78);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(tmp_qloop_33 + tmp_qloop_69*tmp_qloop_77 + tmp_qloop_70*tmp_qloop_78);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_0*tmp_qloop_69*(tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]) + tmp_qloop_33 + tmp_qloop_44*tmp_qloop_70*(tmp_qloop_38*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_43));
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/CMakeLists.txt b/operators/full_stokes/CMakeLists.txt
index d01c2a478ea8a0af31ccccf6a76b7e531a0ac383..cd9d5f1b39da4c7f2d472e1274cd61e7203f952a 100644
--- a/operators/full_stokes/CMakeLists.txt
+++ b/operators/full_stokes/CMakeLists.txt
@@ -1,5 +1,13 @@
 add_library( opgen-full_stokes
 
+   P2ElementwiseFullStokesAnnulusMap_0_0.cpp
+   P2ElementwiseFullStokesAnnulusMap_0_0.hpp
+   P2ElementwiseFullStokesAnnulusMap_0_1.cpp
+   P2ElementwiseFullStokesAnnulusMap_0_1.hpp
+   P2ElementwiseFullStokesAnnulusMap_1_0.cpp
+   P2ElementwiseFullStokesAnnulusMap_1_0.hpp
+   P2ElementwiseFullStokesAnnulusMap_1_1.cpp
+   P2ElementwiseFullStokesAnnulusMap_1_1.hpp
    P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp
    P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp
    P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp
@@ -41,6 +49,12 @@ add_library( opgen-full_stokes
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-full_stokes PRIVATE
 
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
@@ -71,6 +85,10 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp
       avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp
       avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
       noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp
       noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp
@@ -97,6 +115,12 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
 
    set_source_files_properties(
 
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
+      avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp
@@ -137,6 +161,16 @@ else()
 
    target_sources(opgen-full_stokes PRIVATE
 
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
       noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp
       noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp
       noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0ef0a65beeb909a73019e2f702f49193a5b42494
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp
@@ -0,0 +1,399 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseFullStokesAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseFullStokesAnnulusMap_0_0::P2ElementwiseFullStokesAnnulusMap_0_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                              size_t                                     minLevel,
+                                                                              size_t                                     maxLevel,
+                                                                              const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& src,
+                                                   const P2Function< real_t >& dst,
+                                                   uint_t                      level,
+                                                   DoFType                     flag,
+                                                   UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                      const P2Function< idx_t >&                  src,
+                                                      const P2Function< idx_t >&                  dst,
+                                                      uint_t                                      level,
+                                                      DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         mu.communicate< Face, Cell >( level );
+         mu.communicate< Edge, Cell >( level );
+         mu.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muVertex     = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muEdge       = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_macro_2D(
+
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_muEdge,
+                _data_muVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+      }
+
+      ( *invDiag_ ).invertElementwise( level );
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseFullStokesAnnulusMap_0_0::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4549ed15f0772aeb5b6122d4bc26d4c9c4761a88
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp
@@ -0,0 +1,192 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Implements the fully coupled viscous operator of the Stokes problem.
+/// The latter is the extension of the Epsilon operator to the case where
+/// the velocity field need not be divergence-free. This is e.g. the case
+/// in the (truncated) anelastic liquid approximation of mantle convection.
+///
+/// The strong representation of the operator is given by:
+///
+///    - div[ μ (grad(u)+grad(u)ᵀ) ] + 2/3 grad[ μ div(u) ]
+///
+/// Note that the factor 2/3 means that for 2D this is the pseudo-3D form
+/// of the operator.
+///
+/// Component trial: 0
+/// Component test:  0
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ μ { ( 2 ε(u) : ε(v) ) - (2/3) [ ( ∇ · u ) · ( ∇ · v ) ] }
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseFullStokesAnnulusMap_0_0 : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                              public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseFullStokesAnnulusMap_0_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                          size_t                                     minLevel,
+                                          size_t                                     maxLevel,
+                                          const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    792    1096      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    756    1060      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+   /// Kernel type: computeInverseDiagonalOperatorValues
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    522     760      24      16      4              0                 0              1
+   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                       real_t* RESTRICT _data_invDiag_Vertex,
+                                                       real_t* RESTRICT _data_muEdge,
+                                                       real_t* RESTRICT _data_muVertex,
+                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                       int64_t          micro_edges_per_macro_edge,
+                                                       real_t           micro_edges_per_macro_edge_float,
+                                                       real_t           radRayVertex,
+                                                       real_t           radRefVertex,
+                                                       real_t           rayVertex_0,
+                                                       real_t           rayVertex_1,
+                                                       real_t           refVertex_0,
+                                                       real_t           refVertex_1,
+                                                       real_t           thrVertex_0,
+                                                       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bdcc4edeb8de36ea60ec56a1fa0973e5c85f7d1b
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp
@@ -0,0 +1,292 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseFullStokesAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseFullStokesAnnulusMap_0_1::P2ElementwiseFullStokesAnnulusMap_0_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                              size_t                                     minLevel,
+                                                                              size_t                                     maxLevel,
+                                                                              const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& src,
+                                                   const P2Function< real_t >& dst,
+                                                   uint_t                      level,
+                                                   DoFType                     flag,
+                                                   UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                      const P2Function< idx_t >&                  src,
+                                                      const P2Function< idx_t >&                  dst,
+                                                      uint_t                                      level,
+                                                      DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..faa2216105c45c5cee3af2442368e07562eeaa4d
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp
@@ -0,0 +1,159 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Implements the fully coupled viscous operator of the Stokes problem.
+/// The latter is the extension of the Epsilon operator to the case where
+/// the velocity field need not be divergence-free. This is e.g. the case
+/// in the (truncated) anelastic liquid approximation of mantle convection.
+///
+/// The strong representation of the operator is given by:
+///
+///    - div[ μ (grad(u)+grad(u)ᵀ) ] + 2/3 grad[ μ div(u) ]
+///
+/// Note that the factor 2/3 means that for 2D this is the pseudo-3D form
+/// of the operator.
+///
+/// Component trial: 1
+/// Component test:  0
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ μ { ( 2 ε(u) : ε(v) ) - (2/3) [ ( ∇ · u ) · ( ∇ · v ) ] }
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseFullStokesAnnulusMap_0_1 : public Operator< P2Function< real_t >, P2Function< real_t > >
+{
+ public:
+   P2ElementwiseFullStokesAnnulusMap_0_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                          size_t                                     minLevel,
+                                          size_t                                     maxLevel,
+                                          const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///   1032    1244      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    996    1208      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+
+   P2Function< real_t > mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8b8872af11a8e2fc3b3c9a6383b5e477d607267
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp
@@ -0,0 +1,292 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseFullStokesAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseFullStokesAnnulusMap_1_0::P2ElementwiseFullStokesAnnulusMap_1_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                              size_t                                     minLevel,
+                                                                              size_t                                     maxLevel,
+                                                                              const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& src,
+                                                   const P2Function< real_t >& dst,
+                                                   uint_t                      level,
+                                                   DoFType                     flag,
+                                                   UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                      const P2Function< idx_t >&                  src,
+                                                      const P2Function< idx_t >&                  dst,
+                                                      uint_t                                      level,
+                                                      DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4efd4a85bdd2364216b1f39b35d5e8253cf10b60
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp
@@ -0,0 +1,159 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Implements the fully coupled viscous operator of the Stokes problem.
+/// The latter is the extension of the Epsilon operator to the case where
+/// the velocity field need not be divergence-free. This is e.g. the case
+/// in the (truncated) anelastic liquid approximation of mantle convection.
+///
+/// The strong representation of the operator is given by:
+///
+///    - div[ μ (grad(u)+grad(u)ᵀ) ] + 2/3 grad[ μ div(u) ]
+///
+/// Note that the factor 2/3 means that for 2D this is the pseudo-3D form
+/// of the operator.
+///
+/// Component trial: 0
+/// Component test:  1
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ μ { ( 2 ε(u) : ε(v) ) - (2/3) [ ( ∇ · u ) · ( ∇ · v ) ] }
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseFullStokesAnnulusMap_1_0 : public Operator< P2Function< real_t >, P2Function< real_t > >
+{
+ public:
+   P2ElementwiseFullStokesAnnulusMap_1_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                          size_t                                     minLevel,
+                                          size_t                                     maxLevel,
+                                          const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///   1032    1248      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    996    1212      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+
+   P2Function< real_t > mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ab4555900345a37e9f599b1f274084841fa8c123
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp
@@ -0,0 +1,399 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P2ElementwiseFullStokesAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P2ElementwiseFullStokesAnnulusMap_1_1::P2ElementwiseFullStokesAnnulusMap_1_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                                              size_t                                     minLevel,
+                                                                              size_t                                     maxLevel,
+                                                                              const P2Function< real_t >&                _mu )
+: Operator( storage, minLevel, maxLevel )
+, mu( _mu )
+{}
+
+void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& src,
+                                                   const P2Function< real_t >& dst,
+                                                   uint_t                      level,
+                                                   DoFType                     flag,
+                                                   UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                      const P2Function< idx_t >&                  src,
+                                                      const P2Function< idx_t >&                  dst,
+                                                      uint_t                                      level,
+                                                      DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      mu.communicate< Face, Cell >( level );
+      mu.communicate< Edge, Cell >( level );
+      mu.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_srcEdge   = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muVertex  = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_muEdge    = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_muEdge,
+             _data_muVertex,
+             _data_srcEdge,
+             _data_srcVertex,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         mu.communicate< Face, Cell >( level );
+         mu.communicate< Edge, Cell >( level );
+         mu.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( mu, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_Vertex =
+                face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muVertex     = face.getData( mu.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+            real_t* _data_muEdge       = face.getData( mu.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_macro_2D(
+
+                _data_invDiag_Edge,
+                _data_invDiag_Vertex,
+                _data_muEdge,
+                _data_muVertex,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level );
+         ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level );
+         this->timingTree_->stop( "post-communication" );
+      }
+
+      ( *invDiag_ ).invertElementwise( level );
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P2Function< real_t > > P2ElementwiseFullStokesAnnulusMap_1_1::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3af13f6c0590ecdbd5ab9ae44b93bf938647cfe6
--- /dev/null
+++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp
@@ -0,0 +1,192 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Implements the fully coupled viscous operator of the Stokes problem.
+/// The latter is the extension of the Epsilon operator to the case where
+/// the velocity field need not be divergence-free. This is e.g. the case
+/// in the (truncated) anelastic liquid approximation of mantle convection.
+///
+/// The strong representation of the operator is given by:
+///
+///    - div[ μ (grad(u)+grad(u)ᵀ) ] + 2/3 grad[ μ div(u) ]
+///
+/// Note that the factor 2/3 means that for 2D this is the pseudo-3D form
+/// of the operator.
+///
+/// Component trial: 1
+/// Component test:  1
+/// Geometry map:    AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (vectorial space: Lagrange, degree: 2)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///     μ: coefficient    (scalar space:    Lagrange, degree: 2)
+///
+///     ∫ μ { ( 2 ε(u) : ε(v) ) - (2/3) [ ( ∇ · u ) · ( ∇ · v ) ] }
+///
+/// where
+///
+///     ε(w) := (1/2) (∇w + (∇w)ᵀ)
+
+class P2ElementwiseFullStokesAnnulusMap_1_1 : public Operator< P2Function< real_t >, P2Function< real_t > >,
+                                              public OperatorWithInverseDiagonal< P2Function< real_t > >
+{
+ public:
+   P2ElementwiseFullStokesAnnulusMap_1_1( const std::shared_ptr< PrimitiveStorage >& storage,
+                                          size_t                                     minLevel,
+                                          size_t                                     maxLevel,
+                                          const P2Function< real_t >&                _mu );
+
+   void apply( const P2Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P2Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    792    1096      24      16      4              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_muEdge,
+                        real_t* RESTRICT _data_muVertex,
+                        real_t* RESTRICT _data_srcEdge,
+                        real_t* RESTRICT _data_srcVertex,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    756    1060      24      16      4              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           real_t* RESTRICT                     _data_muEdge,
+                           real_t* RESTRICT                     _data_muVertex,
+                           idx_t* RESTRICT                      _data_srcEdge,
+                           idx_t* RESTRICT                      _data_srcVertex,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+   /// Kernel type: computeInverseDiagonalOperatorValues
+   /// - quadrature rule: Dunavant 3 | points: 4, degree: 3
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    522     760      24      16      4              0                 0              1
+   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge,
+                                                       real_t* RESTRICT _data_invDiag_Vertex,
+                                                       real_t* RESTRICT _data_muEdge,
+                                                       real_t* RESTRICT _data_muVertex,
+                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                       int64_t          micro_edges_per_macro_edge,
+                                                       real_t           micro_edges_per_macro_edge_float,
+                                                       real_t           radRayVertex,
+                                                       real_t           radRefVertex,
+                                                       real_t           rayVertex_0,
+                                                       real_t           rayVertex_1,
+                                                       real_t           refVertex_0,
+                                                       real_t           refVertex_1,
+                                                       real_t           thrVertex_0,
+                                                       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P2Function< real_t > > invDiag_;
+   P2Function< real_t >                    mu;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..031fb98c7746b9e8502e074431e05771b5886b15
--- /dev/null
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,1015 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_51 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_30);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_36 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q]))));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q])));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(tmp_qloop_47,tmp_qloop_49);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_48),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_50),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_52);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_56,tmp_qloop_59))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_58)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(tmp_qloop_66,tmp_qloop_67);
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(tmp_qloop_71,tmp_qloop_72);
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(tmp_qloop_76,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_77,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(tmp_qloop_81,tmp_qloop_82);
+                   const __m256d tmp_qloop_84 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_85 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_88 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1])));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_92 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_92,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_94 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_92,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_95 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2])));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_99 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_100 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3])));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_103 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_102,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_104 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_102,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_105 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4])));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0)))),_mm256_mul_pd(tmp_qloop_45,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_31)),tmp_qloop_41))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_add_pd(tmp_qloop_29,tmp_qloop_32)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_64),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_65)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_69),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_70)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_68),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_75)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_73),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_79),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_80)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_84),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_85)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_83),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_65,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_80,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_85,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_80,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_85,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_80),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_85),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_79),_mm256_mul_pd(tmp_qloop_105,tmp_qloop_80)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_84),_mm256_mul_pd(tmp_qloop_105,tmp_qloop_85)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_83),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,_mm256_add_pd(tmp_qloop_106,tmp_qloop_107)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]))),tmp_qloop_44)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_40));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                   const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                   const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                   const real_t tmp_qloop_36 = tmp_qloop_33*0.66666666666666667 + tmp_qloop_35*0.66666666666666667;
+                   const real_t tmp_qloop_37 = tmp_qloop_33*1.0 + tmp_qloop_35*1.0;
+                   const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_40 = (tmp_qloop_38*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])*(tmp_qloop_38*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_39*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                   const real_t tmp_qloop_41 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_38*0.5;
+                   const real_t tmp_qloop_43 = tmp_qloop_39*0.5;
+                   const real_t tmp_qloop_44 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                   const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                   const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                   const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                   const real_t tmp_qloop_64 = tmp_qloop_61*2.0 + tmp_qloop_62*2.0;
+                   const real_t tmp_qloop_65 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_41;
+                   const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_68 = tmp_qloop_66 + tmp_qloop_67;
+                   const real_t tmp_qloop_69 = tmp_qloop_66*2.0 + tmp_qloop_67*2.0;
+                   const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_72 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_73 = tmp_qloop_71 + tmp_qloop_72;
+                   const real_t tmp_qloop_74 = tmp_qloop_71*2.0 + tmp_qloop_72*2.0;
+                   const real_t tmp_qloop_75 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_41;
+                   const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_78 = tmp_qloop_76 + tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_76*2.0 + tmp_qloop_77*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_41;
+                   const real_t tmp_qloop_81 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_82 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_83 = tmp_qloop_81 + tmp_qloop_82;
+                   const real_t tmp_qloop_84 = tmp_qloop_81*2.0 + tmp_qloop_82*2.0;
+                   const real_t tmp_qloop_85 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_41;
+                   const real_t tmp_qloop_86 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_87 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_88 = tmp_qloop_86*0.66666666666666667 + tmp_qloop_87*0.66666666666666667;
+                   const real_t tmp_qloop_89 = tmp_qloop_86*1.0 + tmp_qloop_87*1.0;
+                   const real_t tmp_qloop_90 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_91 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_92 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_93 = tmp_qloop_91*0.66666666666666667 + tmp_qloop_92*0.66666666666666667;
+                   const real_t tmp_qloop_94 = tmp_qloop_91*1.0 + tmp_qloop_92*1.0;
+                   const real_t tmp_qloop_95 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_96 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_97 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_98 = tmp_qloop_96*0.66666666666666667 + tmp_qloop_97*0.66666666666666667;
+                   const real_t tmp_qloop_99 = tmp_qloop_96*1.0 + tmp_qloop_97*1.0;
+                   const real_t tmp_qloop_100 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_101 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_102 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_103 = tmp_qloop_101*0.66666666666666667 + tmp_qloop_102*0.66666666666666667;
+                   const real_t tmp_qloop_104 = tmp_qloop_101*1.0 + tmp_qloop_102*1.0;
+                   const real_t tmp_qloop_105 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_106 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_107 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_60*(-tmp_qloop_36*(tmp_qloop_29 + tmp_qloop_32) + tmp_qloop_37*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0) + tmp_qloop_40 + tmp_qloop_45*(tmp_qloop_24*tmp_qloop_31 + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_41));
+                   const real_t q_tmp_0_1 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_63 + tmp_qloop_37*tmp_qloop_64 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_65);
+                   const real_t q_tmp_0_2 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_68 + tmp_qloop_37*tmp_qloop_69 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_70);
+                   const real_t q_tmp_0_3 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_73 + tmp_qloop_37*tmp_qloop_74 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_75);
+                   const real_t q_tmp_0_4 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_78 + tmp_qloop_37*tmp_qloop_79 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_80);
+                   const real_t q_tmp_0_5 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_83 + tmp_qloop_37*tmp_qloop_84 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_85);
+                   const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89 + tmp_qloop_65*tmp_qloop_90);
+                   const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89 + tmp_qloop_70*tmp_qloop_90);
+                   const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89 + tmp_qloop_75*tmp_qloop_90);
+                   const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89 + tmp_qloop_80*tmp_qloop_90);
+                   const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89 + tmp_qloop_85*tmp_qloop_90);
+                   const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_70*tmp_qloop_95);
+                   const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_75*tmp_qloop_95);
+                   const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_80*tmp_qloop_95);
+                   const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_85*tmp_qloop_95);
+                   const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_75 + tmp_qloop_40 - tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                   const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_80 + tmp_qloop_40 - tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                   const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_85 + tmp_qloop_40 - tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                   const real_t q_tmp_4_4 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_105*tmp_qloop_80 + tmp_qloop_40);
+                   const real_t q_tmp_4_5 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_105*tmp_qloop_85 + tmp_qloop_40);
+                   const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_40 + tmp_qloop_83*(tmp_qloop_106 + tmp_qloop_107)*-0.66666666666666667 + tmp_qloop_84*(tmp_qloop_106*1.0 + tmp_qloop_107*1.0) + tmp_qloop_85*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_44)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_30);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_36 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q]))));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q])));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_46);
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(tmp_qloop_47,tmp_qloop_49);
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_48),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_46),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_50),_mm256_mul_pd(tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_52);
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_51,tmp_qloop_51,tmp_qloop_51,tmp_qloop_51)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_59),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_56,tmp_qloop_59))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_58)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62);
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(tmp_qloop_66,tmp_qloop_67);
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(tmp_qloop_71,tmp_qloop_72);
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(tmp_qloop_76,tmp_qloop_77);
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_77,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(tmp_qloop_81,tmp_qloop_82);
+                   const __m256d tmp_qloop_84 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_85 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))),tmp_qloop_41);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_88 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1])));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_92 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_92,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_94 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_92,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_95 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2])));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_99 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_100 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3])));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_103 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_102,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_104 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_102,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_105 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4])));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_107 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0)))),_mm256_mul_pd(tmp_qloop_45,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_31)),tmp_qloop_41))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_add_pd(tmp_qloop_29,tmp_qloop_32)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_64),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_65)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_69),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_70)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_68),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_75)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_73),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_79),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_80)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_84),_mm256_mul_pd(tmp_qloop_45,tmp_qloop_85)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,tmp_qloop_83),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_65,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_80,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_89),_mm256_mul_pd(tmp_qloop_85,tmp_qloop_90)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_88),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_80,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_85,tmp_qloop_95)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_93),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_75),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_80),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_85),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_79),_mm256_mul_pd(tmp_qloop_105,tmp_qloop_80)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_84),_mm256_mul_pd(tmp_qloop_105,tmp_qloop_85)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_83),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_40));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_60,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_83,_mm256_add_pd(tmp_qloop_106,tmp_qloop_107)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]))),tmp_qloop_44)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_40));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                   const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                   const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                   const real_t tmp_qloop_36 = tmp_qloop_33*0.66666666666666667 + tmp_qloop_35*0.66666666666666667;
+                   const real_t tmp_qloop_37 = tmp_qloop_33*1.0 + tmp_qloop_35*1.0;
+                   const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_40 = (tmp_qloop_38*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])*(tmp_qloop_38*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_39*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                   const real_t tmp_qloop_41 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                   const real_t tmp_qloop_42 = tmp_qloop_38*0.5;
+                   const real_t tmp_qloop_43 = tmp_qloop_39*0.5;
+                   const real_t tmp_qloop_44 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                   const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                   const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                   const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                   const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                   const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                   const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                   const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                   const real_t tmp_qloop_64 = tmp_qloop_61*2.0 + tmp_qloop_62*2.0;
+                   const real_t tmp_qloop_65 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_41;
+                   const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_68 = tmp_qloop_66 + tmp_qloop_67;
+                   const real_t tmp_qloop_69 = tmp_qloop_66*2.0 + tmp_qloop_67*2.0;
+                   const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_41;
+                   const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_72 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_73 = tmp_qloop_71 + tmp_qloop_72;
+                   const real_t tmp_qloop_74 = tmp_qloop_71*2.0 + tmp_qloop_72*2.0;
+                   const real_t tmp_qloop_75 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_41;
+                   const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_78 = tmp_qloop_76 + tmp_qloop_77;
+                   const real_t tmp_qloop_79 = tmp_qloop_76*2.0 + tmp_qloop_77*2.0;
+                   const real_t tmp_qloop_80 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_41;
+                   const real_t tmp_qloop_81 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_82 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_83 = tmp_qloop_81 + tmp_qloop_82;
+                   const real_t tmp_qloop_84 = tmp_qloop_81*2.0 + tmp_qloop_82*2.0;
+                   const real_t tmp_qloop_85 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_41;
+                   const real_t tmp_qloop_86 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_87 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_88 = tmp_qloop_86*0.66666666666666667 + tmp_qloop_87*0.66666666666666667;
+                   const real_t tmp_qloop_89 = tmp_qloop_86*1.0 + tmp_qloop_87*1.0;
+                   const real_t tmp_qloop_90 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_91 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_92 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_93 = tmp_qloop_91*0.66666666666666667 + tmp_qloop_92*0.66666666666666667;
+                   const real_t tmp_qloop_94 = tmp_qloop_91*1.0 + tmp_qloop_92*1.0;
+                   const real_t tmp_qloop_95 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_96 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_97 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_98 = tmp_qloop_96*0.66666666666666667 + tmp_qloop_97*0.66666666666666667;
+                   const real_t tmp_qloop_99 = tmp_qloop_96*1.0 + tmp_qloop_97*1.0;
+                   const real_t tmp_qloop_100 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_101 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_102 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_103 = tmp_qloop_101*0.66666666666666667 + tmp_qloop_102*0.66666666666666667;
+                   const real_t tmp_qloop_104 = tmp_qloop_101*1.0 + tmp_qloop_102*1.0;
+                   const real_t tmp_qloop_105 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_44*2.0;
+                   const real_t tmp_qloop_106 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_107 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_60*(-tmp_qloop_36*(tmp_qloop_29 + tmp_qloop_32) + tmp_qloop_37*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0) + tmp_qloop_40 + tmp_qloop_45*(tmp_qloop_24*tmp_qloop_31 + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_41));
+                   const real_t q_tmp_0_1 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_63 + tmp_qloop_37*tmp_qloop_64 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_65);
+                   const real_t q_tmp_0_2 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_68 + tmp_qloop_37*tmp_qloop_69 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_70);
+                   const real_t q_tmp_0_3 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_73 + tmp_qloop_37*tmp_qloop_74 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_75);
+                   const real_t q_tmp_0_4 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_78 + tmp_qloop_37*tmp_qloop_79 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_80);
+                   const real_t q_tmp_0_5 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_83 + tmp_qloop_37*tmp_qloop_84 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_85);
+                   const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89 + tmp_qloop_65*tmp_qloop_90);
+                   const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89 + tmp_qloop_70*tmp_qloop_90);
+                   const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89 + tmp_qloop_75*tmp_qloop_90);
+                   const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89 + tmp_qloop_80*tmp_qloop_90);
+                   const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89 + tmp_qloop_85*tmp_qloop_90);
+                   const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_70*tmp_qloop_95);
+                   const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_75*tmp_qloop_95);
+                   const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_80*tmp_qloop_95);
+                   const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_85*tmp_qloop_95);
+                   const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_75 + tmp_qloop_40 - tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                   const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_80 + tmp_qloop_40 - tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                   const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_85 + tmp_qloop_40 - tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                   const real_t q_tmp_4_4 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_105*tmp_qloop_80 + tmp_qloop_40);
+                   const real_t q_tmp_4_5 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_105*tmp_qloop_85 + tmp_qloop_40);
+                   const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_40 + tmp_qloop_83*(tmp_qloop_106 + tmp_qloop_107)*-0.66666666666666667 + tmp_qloop_84*(tmp_qloop_106*1.0 + tmp_qloop_107*1.0) + tmp_qloop_85*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_44)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..49eecf5adffb44d9f835bd0ea11b0814403394bc
--- /dev/null
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,691 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_48 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_30);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q]))));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_43);
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(tmp_qloop_44,tmp_qloop_46);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_47)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_48,tmp_qloop_48,tmp_qloop_48,tmp_qloop_48));
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_45),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_43),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_47),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_47)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_48,tmp_qloop_48,tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_49);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_48,tmp_qloop_48,tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_45);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_55,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_53,tmp_qloop_56))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_55)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,tmp_qloop_53),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_29,tmp_qloop_32),_mm256_add_pd(tmp_qloop_33,tmp_qloop_35)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_31)),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_58,tmp_qloop_59),_mm256_add_pd(tmp_qloop_60,tmp_qloop_61)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_62,tmp_qloop_63),_mm256_add_pd(tmp_qloop_64,tmp_qloop_65)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_66,tmp_qloop_67),_mm256_add_pd(tmp_qloop_68,tmp_qloop_69)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_70,tmp_qloop_71),_mm256_add_pd(tmp_qloop_72,tmp_qloop_73)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_77,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_74,tmp_qloop_75),_mm256_add_pd(tmp_qloop_76,tmp_qloop_77)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                   const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                   const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                   const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                   const real_t tmp_qloop_41 = tmp_qloop_37*0.5;
+                   const real_t tmp_qloop_42 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                   const real_t tmp_qloop_43 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_44 = (tmp_qloop_43*tmp_qloop_43);
+                   const real_t tmp_qloop_45 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                   const real_t tmp_qloop_47 = tmp_qloop_44 + tmp_qloop_46;
+                   const real_t tmp_qloop_49 = pow(tmp_qloop_47, -0.50000000000000000)*tmp_qloop_48*1.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_45) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_43);
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_47, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                   const real_t tmp_qloop_54 = tmp_qloop_45*tmp_qloop_49;
+                   const real_t tmp_qloop_55 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                   const real_t tmp_qloop_56 = tmp_qloop_43*tmp_qloop_45;
+                   const real_t tmp_qloop_57 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_50 - tmp_qloop_55*tmp_qloop_56)*(tmp_qloop_12*tmp_qloop_54 + tmp_qloop_53*tmp_qloop_56) - (tmp_qloop_0*tmp_qloop_54 + tmp_qloop_44*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_50 - tmp_qloop_46*tmp_qloop_53))*_data_q_w[q];
+                   const real_t tmp_qloop_58 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_59 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_71 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_72 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_73 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_74 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_75 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_29 + tmp_qloop_32)*(tmp_qloop_33 + tmp_qloop_35)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_32*2.0)*(tmp_qloop_33*1.0 + tmp_qloop_35*1.0) + (tmp_qloop_24*tmp_qloop_31 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_1_1 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_58 + tmp_qloop_59)*(tmp_qloop_60 + tmp_qloop_61)*-0.66666666666666667 + (tmp_qloop_58*2.0 + tmp_qloop_59*2.0)*(tmp_qloop_60*1.0 + tmp_qloop_61*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_2_2 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_62 + tmp_qloop_63)*(tmp_qloop_64 + tmp_qloop_65)*-0.66666666666666667 + (tmp_qloop_62*2.0 + tmp_qloop_63*2.0)*(tmp_qloop_64*1.0 + tmp_qloop_65*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_3_3 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_66 + tmp_qloop_67)*(tmp_qloop_68 + tmp_qloop_69)*-0.66666666666666667 + (tmp_qloop_66*2.0 + tmp_qloop_67*2.0)*(tmp_qloop_68*1.0 + tmp_qloop_69*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_4_4 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_70 + tmp_qloop_71)*(tmp_qloop_72 + tmp_qloop_73)*-0.66666666666666667 + (tmp_qloop_70*2.0 + tmp_qloop_71*2.0)*(tmp_qloop_72*1.0 + tmp_qloop_73*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_5_5 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_74 + tmp_qloop_75)*(tmp_qloop_76 + tmp_qloop_77)*-0.66666666666666667 + (tmp_qloop_74*2.0 + tmp_qloop_75*2.0)*(tmp_qloop_76*1.0 + tmp_qloop_77*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_42)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_30);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q]))));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_43);
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(tmp_qloop_44,tmp_qloop_46);
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_47)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_48,tmp_qloop_48,tmp_qloop_48,tmp_qloop_48));
+                   const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49);
+                   const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_45),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_43),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_47),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_47)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_48,tmp_qloop_48,tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_49);
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_51,_mm256_set_pd(tmp_qloop_48,tmp_qloop_48,tmp_qloop_48,tmp_qloop_48)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_45);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_55,tmp_qloop_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_53,tmp_qloop_56))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_55)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,tmp_qloop_53),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_29,tmp_qloop_32),_mm256_add_pd(tmp_qloop_33,tmp_qloop_35)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_31)),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_58,tmp_qloop_59),_mm256_add_pd(tmp_qloop_60,tmp_qloop_61)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_62,tmp_qloop_63),_mm256_add_pd(tmp_qloop_64,tmp_qloop_65)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_66,tmp_qloop_67),_mm256_add_pd(tmp_qloop_68,tmp_qloop_69)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_70,tmp_qloop_71),_mm256_add_pd(tmp_qloop_72,tmp_qloop_73)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_77,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_74,tmp_qloop_75),_mm256_add_pd(tmp_qloop_76,tmp_qloop_77)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))),tmp_qloop_39),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]))),tmp_qloop_42)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                   const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                   const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                   const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                   const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                   const real_t tmp_qloop_41 = tmp_qloop_37*0.5;
+                   const real_t tmp_qloop_42 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                   const real_t tmp_qloop_43 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_44 = (tmp_qloop_43*tmp_qloop_43);
+                   const real_t tmp_qloop_45 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                   const real_t tmp_qloop_47 = tmp_qloop_44 + tmp_qloop_46;
+                   const real_t tmp_qloop_49 = pow(tmp_qloop_47, -0.50000000000000000)*tmp_qloop_48*1.0;
+                   const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                   const real_t tmp_qloop_51 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_45) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_43);
+                   const real_t tmp_qloop_52 = pow(tmp_qloop_47, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_53 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                   const real_t tmp_qloop_54 = tmp_qloop_45*tmp_qloop_49;
+                   const real_t tmp_qloop_55 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                   const real_t tmp_qloop_56 = tmp_qloop_43*tmp_qloop_45;
+                   const real_t tmp_qloop_57 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_50 - tmp_qloop_55*tmp_qloop_56)*(tmp_qloop_12*tmp_qloop_54 + tmp_qloop_53*tmp_qloop_56) - (tmp_qloop_0*tmp_qloop_54 + tmp_qloop_44*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_50 - tmp_qloop_46*tmp_qloop_53))*_data_q_w[q];
+                   const real_t tmp_qloop_58 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_59 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_71 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_72 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_73 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_74 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_75 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_29 + tmp_qloop_32)*(tmp_qloop_33 + tmp_qloop_35)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_32*2.0)*(tmp_qloop_33*1.0 + tmp_qloop_35*1.0) + (tmp_qloop_24*tmp_qloop_31 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_1_1 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_58 + tmp_qloop_59)*(tmp_qloop_60 + tmp_qloop_61)*-0.66666666666666667 + (tmp_qloop_58*2.0 + tmp_qloop_59*2.0)*(tmp_qloop_60*1.0 + tmp_qloop_61*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_2_2 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_62 + tmp_qloop_63)*(tmp_qloop_64 + tmp_qloop_65)*-0.66666666666666667 + (tmp_qloop_62*2.0 + tmp_qloop_63*2.0)*(tmp_qloop_64*1.0 + tmp_qloop_65*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_3_3 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_66 + tmp_qloop_67)*(tmp_qloop_68 + tmp_qloop_69)*-0.66666666666666667 + (tmp_qloop_66*2.0 + tmp_qloop_67*2.0)*(tmp_qloop_68*1.0 + tmp_qloop_69*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_4_4 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_70 + tmp_qloop_71)*(tmp_qloop_72 + tmp_qloop_73)*-0.66666666666666667 + (tmp_qloop_70*2.0 + tmp_qloop_71*2.0)*(tmp_qloop_72*1.0 + tmp_qloop_73*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_42)*2.0);
+                   const real_t q_tmp_5_5 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_74 + tmp_qloop_75)*(tmp_qloop_76 + tmp_qloop_77)*-0.66666666666666667 + (tmp_qloop_74*2.0 + tmp_qloop_75*2.0)*(tmp_qloop_76*1.0 + tmp_qloop_77*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_42)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eff38623dd271f6f3d9944e8d4cea830fa15e5dc
--- /dev/null
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
@@ -0,0 +1,1213 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_53 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_5,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_4),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_21,tmp_qloop_9));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_5),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_38 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q])));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(tmp_qloop_49,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_52)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_54);
+                   const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_50),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_48),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_52),_mm256_mul_pd(tmp_qloop_52,tmp_qloop_52)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_54);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_50);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_49,tmp_qloop_60)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_51,tmp_qloop_58),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_60,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_58,tmp_qloop_61))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(tmp_qloop_63,tmp_qloop_64);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(tmp_qloop_68,tmp_qloop_69);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(tmp_qloop_73,tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_74,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(tmp_qloop_78,tmp_qloop_79);
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_85 = _mm256_add_pd(tmp_qloop_83,tmp_qloop_84);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_84,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_89,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_89,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1])));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_95 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_97 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2])));
+                   const __m256d tmp_qloop_98 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_100 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_98,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_99,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_98,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_99,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_102 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3])));
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_105 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4])));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_110 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_112 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5])));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_47),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_41));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_66));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_71));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_76));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_81));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_86));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_91));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_66),tmp_qloop_91));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_71),tmp_qloop_91));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_75,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_76),tmp_qloop_91));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_81),tmp_qloop_91));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_86),tmp_qloop_91));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_96));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_66),tmp_qloop_96));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_71),tmp_qloop_96));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_75,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_76),tmp_qloop_96));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_81),tmp_qloop_96));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_86),tmp_qloop_96));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_43),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_32),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_41));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_66));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_71));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_76));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_81));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_86));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_43),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_32),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_41));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_66));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_71));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_76));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_81));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_86));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_43),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_32),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_41));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_66));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_71));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_76));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_81));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_86));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                   const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                   const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                   const real_t tmp_qloop_30 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_31 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_29 + tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                   const real_t tmp_qloop_35 = -tmp_qloop_26*tmp_qloop_27;
+                   const real_t tmp_qloop_36 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                   const real_t tmp_qloop_37 = tmp_qloop_34*0.66666666666666667 + tmp_qloop_36*0.66666666666666667;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_38*(tmp_qloop_34*1.0 + tmp_qloop_36*1.0);
+                   const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_30*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q];
+                   const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0);
+                   const real_t tmp_qloop_42 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_42;
+                   const real_t tmp_qloop_44 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_45 = tmp_qloop_30*0.5;
+                   const real_t tmp_qloop_46 = tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_35*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                   const real_t tmp_qloop_47 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                   const real_t tmp_qloop_52 = tmp_qloop_49 + tmp_qloop_51;
+                   const real_t tmp_qloop_54 = pow(tmp_qloop_52, -0.50000000000000000)*tmp_qloop_53*1.0;
+                   const real_t tmp_qloop_55 = tmp_qloop_48*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_48) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_50);
+                   const real_t tmp_qloop_57 = pow(tmp_qloop_52, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_58 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                   const real_t tmp_qloop_59 = tmp_qloop_50*tmp_qloop_54;
+                   const real_t tmp_qloop_60 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                   const real_t tmp_qloop_61 = tmp_qloop_48*tmp_qloop_50;
+                   const real_t tmp_qloop_62 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_55 - tmp_qloop_51*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_59 + tmp_qloop_49*tmp_qloop_60) - (tmp_qloop_0*tmp_qloop_59 + tmp_qloop_58*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_55 - tmp_qloop_60*tmp_qloop_61))*_data_q_w[q];
+                   const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_64 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_65 = tmp_qloop_63 + tmp_qloop_64;
+                   const real_t tmp_qloop_66 = tmp_qloop_40*(tmp_qloop_63*2.0 + tmp_qloop_64*2.0);
+                   const real_t tmp_qloop_67 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_42;
+                   const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_69 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                   const real_t tmp_qloop_71 = tmp_qloop_40*(tmp_qloop_68*2.0 + tmp_qloop_69*2.0);
+                   const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_42;
+                   const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_74 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_75 = tmp_qloop_73 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_40*(tmp_qloop_73*2.0 + tmp_qloop_74*2.0);
+                   const real_t tmp_qloop_77 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_42;
+                   const real_t tmp_qloop_78 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_79 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_80 = tmp_qloop_78 + tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_40*(tmp_qloop_78*2.0 + tmp_qloop_79*2.0);
+                   const real_t tmp_qloop_82 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_42;
+                   const real_t tmp_qloop_83 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_84 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_85 = tmp_qloop_83 + tmp_qloop_84;
+                   const real_t tmp_qloop_86 = tmp_qloop_40*(tmp_qloop_83*2.0 + tmp_qloop_84*2.0);
+                   const real_t tmp_qloop_87 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_42;
+                   const real_t tmp_qloop_88 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_89 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_90 = tmp_qloop_88*0.66666666666666667 + tmp_qloop_89*0.66666666666666667;
+                   const real_t tmp_qloop_91 = tmp_qloop_38*(tmp_qloop_88*1.0 + tmp_qloop_89*1.0);
+                   const real_t tmp_qloop_92 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_93 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_94 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_95 = tmp_qloop_93*0.66666666666666667 + tmp_qloop_94*0.66666666666666667;
+                   const real_t tmp_qloop_96 = tmp_qloop_38*(tmp_qloop_93*1.0 + tmp_qloop_94*1.0);
+                   const real_t tmp_qloop_97 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_98 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_99 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_100 = tmp_qloop_98*0.66666666666666667 + tmp_qloop_99*0.66666666666666667;
+                   const real_t tmp_qloop_101 = tmp_qloop_38*(tmp_qloop_98*1.0 + tmp_qloop_99*1.0);
+                   const real_t tmp_qloop_102 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_103 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_104 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_105 = tmp_qloop_103*0.66666666666666667 + tmp_qloop_104*0.66666666666666667;
+                   const real_t tmp_qloop_106 = tmp_qloop_38*(tmp_qloop_103*1.0 + tmp_qloop_104*1.0);
+                   const real_t tmp_qloop_107 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_108 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_109 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_110 = tmp_qloop_108*0.66666666666666667 + tmp_qloop_109*0.66666666666666667;
+                   const real_t tmp_qloop_111 = tmp_qloop_38*(tmp_qloop_108*1.0 + tmp_qloop_109*1.0);
+                   const real_t tmp_qloop_112 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_46*2.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_37 + tmp_qloop_39 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_47);
+                   const real_t q_tmp_0_1 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_65 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_0_2 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_70 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_0_3 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_75 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_0_4 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_80 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_0_5 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_85 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_87 + tmp_qloop_86);
+                   const real_t q_tmp_1_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_90 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_90 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_90 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_90 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_90 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_90 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_2_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_95 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_95 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_95 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_95 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_95 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_3_0 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_32 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_43 + tmp_qloop_41);
+                   const real_t q_tmp_3_1 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_65 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_3_2 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_70 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_3_3 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_75 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_3_4 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_80 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_3_5 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_85 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_87 + tmp_qloop_86);
+                   const real_t q_tmp_4_0 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_32 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_43 + tmp_qloop_41);
+                   const real_t q_tmp_4_1 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_65 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_4_2 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_70 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_4_3 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_75 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_4_4 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_80 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_4_5 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_85 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_87 + tmp_qloop_86);
+                   const real_t q_tmp_5_0 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_32 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_43 + tmp_qloop_41);
+                   const real_t q_tmp_5_1 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_65 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_5_2 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_70 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_5_3 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_75 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_5_4 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_80 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_5_5 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_85 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_87 + tmp_qloop_86);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_5,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_4),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_21,tmp_qloop_9));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_5),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_add_pd(tmp_qloop_29,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_38 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q])));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(tmp_qloop_49,tmp_qloop_51);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_52)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_54);
+                   const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_50),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_48),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_52),_mm256_mul_pd(tmp_qloop_52,tmp_qloop_52)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_54);
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_57,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_50);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_49,tmp_qloop_60)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_51,tmp_qloop_58),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_60,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_58,tmp_qloop_61))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_65 = _mm256_add_pd(tmp_qloop_63,tmp_qloop_64);
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_70 = _mm256_add_pd(tmp_qloop_68,tmp_qloop_69);
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_75 = _mm256_add_pd(tmp_qloop_73,tmp_qloop_74);
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_74,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_80 = _mm256_add_pd(tmp_qloop_78,tmp_qloop_79);
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_85 = _mm256_add_pd(tmp_qloop_83,tmp_qloop_84);
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_84,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))),tmp_qloop_42);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_89,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_89,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1])));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_95 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_97 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2])));
+                   const __m256d tmp_qloop_98 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_100 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_98,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_99,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_98,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_99,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_102 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3])));
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_105 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4])));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_109 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_110 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(tmp_qloop_38,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_112 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5])));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_47),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_41));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_66));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_71));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_76));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_81));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_47,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_39),tmp_qloop_86));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_91));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_66),tmp_qloop_91));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_71),tmp_qloop_91));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_75,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_76),tmp_qloop_91));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_81),tmp_qloop_91));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_90),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_86),tmp_qloop_91));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_32,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_96));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_65,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_66),tmp_qloop_96));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_71),tmp_qloop_96));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_75,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_76),tmp_qloop_96));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_81),tmp_qloop_96));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_97),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_86),tmp_qloop_96));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_43),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_32),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_41));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_66));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_71));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_76));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_81));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_100,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_101),tmp_qloop_86));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_43),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_32),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_41));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_66));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_71));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_76));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_81));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_105,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_106),tmp_qloop_86));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_43),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_32),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_41));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_67),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_65),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_66));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_72),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_71));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_77),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_76));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_82),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_81));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_62,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_87),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_110,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111),tmp_qloop_86));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                   const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                   const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                   const real_t tmp_qloop_30 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_31 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_29 + tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                   const real_t tmp_qloop_35 = -tmp_qloop_26*tmp_qloop_27;
+                   const real_t tmp_qloop_36 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                   const real_t tmp_qloop_37 = tmp_qloop_34*0.66666666666666667 + tmp_qloop_36*0.66666666666666667;
+                   const real_t tmp_qloop_38 = tmp_qloop_33*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_38*(tmp_qloop_34*1.0 + tmp_qloop_36*1.0);
+                   const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_30*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q];
+                   const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0);
+                   const real_t tmp_qloop_42 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_42;
+                   const real_t tmp_qloop_44 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_45 = tmp_qloop_30*0.5;
+                   const real_t tmp_qloop_46 = tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_35*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                   const real_t tmp_qloop_47 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_48 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                   const real_t tmp_qloop_50 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                   const real_t tmp_qloop_52 = tmp_qloop_49 + tmp_qloop_51;
+                   const real_t tmp_qloop_54 = pow(tmp_qloop_52, -0.50000000000000000)*tmp_qloop_53*1.0;
+                   const real_t tmp_qloop_55 = tmp_qloop_48*tmp_qloop_54;
+                   const real_t tmp_qloop_56 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_48) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_50);
+                   const real_t tmp_qloop_57 = pow(tmp_qloop_52, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_58 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                   const real_t tmp_qloop_59 = tmp_qloop_50*tmp_qloop_54;
+                   const real_t tmp_qloop_60 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                   const real_t tmp_qloop_61 = tmp_qloop_48*tmp_qloop_50;
+                   const real_t tmp_qloop_62 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_55 - tmp_qloop_51*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_59 + tmp_qloop_49*tmp_qloop_60) - (tmp_qloop_0*tmp_qloop_59 + tmp_qloop_58*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_55 - tmp_qloop_60*tmp_qloop_61))*_data_q_w[q];
+                   const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_64 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_65 = tmp_qloop_63 + tmp_qloop_64;
+                   const real_t tmp_qloop_66 = tmp_qloop_40*(tmp_qloop_63*2.0 + tmp_qloop_64*2.0);
+                   const real_t tmp_qloop_67 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_42;
+                   const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_69 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                   const real_t tmp_qloop_71 = tmp_qloop_40*(tmp_qloop_68*2.0 + tmp_qloop_69*2.0);
+                   const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_42;
+                   const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_74 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_75 = tmp_qloop_73 + tmp_qloop_74;
+                   const real_t tmp_qloop_76 = tmp_qloop_40*(tmp_qloop_73*2.0 + tmp_qloop_74*2.0);
+                   const real_t tmp_qloop_77 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_42;
+                   const real_t tmp_qloop_78 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_79 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_80 = tmp_qloop_78 + tmp_qloop_79;
+                   const real_t tmp_qloop_81 = tmp_qloop_40*(tmp_qloop_78*2.0 + tmp_qloop_79*2.0);
+                   const real_t tmp_qloop_82 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_42;
+                   const real_t tmp_qloop_83 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_84 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_85 = tmp_qloop_83 + tmp_qloop_84;
+                   const real_t tmp_qloop_86 = tmp_qloop_40*(tmp_qloop_83*2.0 + tmp_qloop_84*2.0);
+                   const real_t tmp_qloop_87 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_42;
+                   const real_t tmp_qloop_88 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_89 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_90 = tmp_qloop_88*0.66666666666666667 + tmp_qloop_89*0.66666666666666667;
+                   const real_t tmp_qloop_91 = tmp_qloop_38*(tmp_qloop_88*1.0 + tmp_qloop_89*1.0);
+                   const real_t tmp_qloop_92 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_93 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_94 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_95 = tmp_qloop_93*0.66666666666666667 + tmp_qloop_94*0.66666666666666667;
+                   const real_t tmp_qloop_96 = tmp_qloop_38*(tmp_qloop_93*1.0 + tmp_qloop_94*1.0);
+                   const real_t tmp_qloop_97 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_98 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_99 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_100 = tmp_qloop_98*0.66666666666666667 + tmp_qloop_99*0.66666666666666667;
+                   const real_t tmp_qloop_101 = tmp_qloop_38*(tmp_qloop_98*1.0 + tmp_qloop_99*1.0);
+                   const real_t tmp_qloop_102 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_103 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_104 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_105 = tmp_qloop_103*0.66666666666666667 + tmp_qloop_104*0.66666666666666667;
+                   const real_t tmp_qloop_106 = tmp_qloop_38*(tmp_qloop_103*1.0 + tmp_qloop_104*1.0);
+                   const real_t tmp_qloop_107 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_46*2.0;
+                   const real_t tmp_qloop_108 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_109 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_110 = tmp_qloop_108*0.66666666666666667 + tmp_qloop_109*0.66666666666666667;
+                   const real_t tmp_qloop_111 = tmp_qloop_38*(tmp_qloop_108*1.0 + tmp_qloop_109*1.0);
+                   const real_t tmp_qloop_112 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_46*2.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_37 + tmp_qloop_39 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_47);
+                   const real_t q_tmp_0_1 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_65 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_0_2 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_70 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_0_3 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_75 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_0_4 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_80 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_0_5 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_85 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_87 + tmp_qloop_86);
+                   const real_t q_tmp_1_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_90 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_90 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_90 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_90 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_90 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_1_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_90 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_92 + tmp_qloop_91);
+                   const real_t q_tmp_2_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_95 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_95 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_95 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_95 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_2_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_95 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_97 + tmp_qloop_96);
+                   const real_t q_tmp_3_0 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_32 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_43 + tmp_qloop_41);
+                   const real_t q_tmp_3_1 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_65 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_3_2 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_70 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_3_3 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_75 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_3_4 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_80 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_3_5 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_85 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_87 + tmp_qloop_86);
+                   const real_t q_tmp_4_0 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_32 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_43 + tmp_qloop_41);
+                   const real_t q_tmp_4_1 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_65 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_4_2 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_70 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_4_3 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_75 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_4_4 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_80 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_4_5 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_85 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_87 + tmp_qloop_86);
+                   const real_t q_tmp_5_0 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_32 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_43 + tmp_qloop_41);
+                   const real_t q_tmp_5_1 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_65 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_67 + tmp_qloop_66);
+                   const real_t q_tmp_5_2 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_70 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_72 + tmp_qloop_71);
+                   const real_t q_tmp_5_3 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_75 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_77 + tmp_qloop_76);
+                   const real_t q_tmp_5_4 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_80 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_82 + tmp_qloop_81);
+                   const real_t q_tmp_5_5 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_85 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_87 + tmp_qloop_86);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c3def7c95547268ef404c9263bce8e7dd4428031
--- /dev/null
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
@@ -0,0 +1,1221 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_55 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_add_pd(tmp_qloop_29,tmp_qloop_32);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_38 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_30);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_31)),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q])));
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(tmp_qloop_51,tmp_qloop_53);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_54)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_55,tmp_qloop_55,tmp_qloop_55,tmp_qloop_55));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_52),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_50),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_54),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(tmp_qloop_55,tmp_qloop_55,tmp_qloop_55,tmp_qloop_55)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_56);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(tmp_qloop_55,tmp_qloop_55,tmp_qloop_55,tmp_qloop_55)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_52);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_60,tmp_qloop_63))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_51,tmp_qloop_62)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_60),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71);
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(tmp_qloop_75,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(tmp_qloop_80,tmp_qloop_81);
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_84 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(tmp_qloop_85,tmp_qloop_86);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_94 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1])));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_97 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_98 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2])));
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_102 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_104 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3])));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_109 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4])));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_112 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5])));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_49),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_38),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_43));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_68));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_73));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_78));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_83));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_88));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_93));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_68),tmp_qloop_93));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_73),tmp_qloop_93));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_78),tmp_qloop_93));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_83),tmp_qloop_93));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_89,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_88),tmp_qloop_93));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_98));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_68),tmp_qloop_98));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_73),tmp_qloop_98));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_78),tmp_qloop_98));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_83),tmp_qloop_98));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_89,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_88),tmp_qloop_98));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_45),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_33),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_41));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_68));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_73));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_78));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_83));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_88));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_45),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_33),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_41));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_68));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_73));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_78));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_83));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_88));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_45),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_33),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_41));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_68));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_73));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_78));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_83));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_88));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_29 + tmp_qloop_32;
+                   const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                   const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                   const real_t tmp_qloop_38 = tmp_qloop_35*0.66666666666666667 + tmp_qloop_37*0.66666666666666667;
+                   const real_t tmp_qloop_39 = tmp_qloop_27*tmp_qloop_30;
+                   const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q];
+                   const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0);
+                   const real_t tmp_qloop_42 = tmp_qloop_34*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_42*(tmp_qloop_35*1.0 + tmp_qloop_37*1.0);
+                   const real_t tmp_qloop_44 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_24*tmp_qloop_31 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_47 = tmp_qloop_39*0.5;
+                   const real_t tmp_qloop_48 = tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_36*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_50 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                   const real_t tmp_qloop_52 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_53 = (tmp_qloop_52*tmp_qloop_52);
+                   const real_t tmp_qloop_54 = tmp_qloop_51 + tmp_qloop_53;
+                   const real_t tmp_qloop_56 = pow(tmp_qloop_54, -0.50000000000000000)*tmp_qloop_55*1.0;
+                   const real_t tmp_qloop_57 = tmp_qloop_50*tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_52) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_50);
+                   const real_t tmp_qloop_59 = pow(tmp_qloop_54, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_60 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                   const real_t tmp_qloop_61 = tmp_qloop_52*tmp_qloop_56;
+                   const real_t tmp_qloop_62 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                   const real_t tmp_qloop_63 = tmp_qloop_50*tmp_qloop_52;
+                   const real_t tmp_qloop_64 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_57 - tmp_qloop_62*tmp_qloop_63)*(tmp_qloop_12*tmp_qloop_61 + tmp_qloop_60*tmp_qloop_63) - (tmp_qloop_0*tmp_qloop_61 + tmp_qloop_51*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_57 - tmp_qloop_53*tmp_qloop_60))*_data_q_w[q];
+                   const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_66 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                   const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                   const real_t tmp_qloop_68 = tmp_qloop_40*(tmp_qloop_65*2.0 + tmp_qloop_66*2.0);
+                   const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_44;
+                   const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_71 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                   const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                   const real_t tmp_qloop_73 = tmp_qloop_40*(tmp_qloop_70*2.0 + tmp_qloop_71*2.0);
+                   const real_t tmp_qloop_74 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_44;
+                   const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_76 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                   const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_40*(tmp_qloop_75*2.0 + tmp_qloop_76*2.0);
+                   const real_t tmp_qloop_79 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_44;
+                   const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_81 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                   const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                   const real_t tmp_qloop_83 = tmp_qloop_40*(tmp_qloop_80*2.0 + tmp_qloop_81*2.0);
+                   const real_t tmp_qloop_84 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_44;
+                   const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_86 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                   const real_t tmp_qloop_87 = tmp_qloop_85 + tmp_qloop_86;
+                   const real_t tmp_qloop_88 = tmp_qloop_40*(tmp_qloop_85*2.0 + tmp_qloop_86*2.0);
+                   const real_t tmp_qloop_89 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_44;
+                   const real_t tmp_qloop_90 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_91 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                   const real_t tmp_qloop_93 = tmp_qloop_42*(tmp_qloop_90*1.0 + tmp_qloop_91*1.0);
+                   const real_t tmp_qloop_94 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_95 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_96 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                   const real_t tmp_qloop_98 = tmp_qloop_42*(tmp_qloop_95*1.0 + tmp_qloop_96*1.0);
+                   const real_t tmp_qloop_99 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_100 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_101 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                   const real_t tmp_qloop_103 = tmp_qloop_42*(tmp_qloop_100*1.0 + tmp_qloop_101*1.0);
+                   const real_t tmp_qloop_104 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_105 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_106 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_107 = tmp_qloop_105*0.66666666666666667 + tmp_qloop_106*0.66666666666666667;
+                   const real_t tmp_qloop_108 = tmp_qloop_42*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0);
+                   const real_t tmp_qloop_109 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_110 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_111 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_112 = tmp_qloop_110*0.66666666666666667 + tmp_qloop_111*0.66666666666666667;
+                   const real_t tmp_qloop_113 = tmp_qloop_42*(tmp_qloop_110*1.0 + tmp_qloop_111*1.0);
+                   const real_t tmp_qloop_114 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_48*2.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_38 + tmp_qloop_41 + tmp_qloop_43 + tmp_qloop_45*tmp_qloop_49);
+                   const real_t q_tmp_0_1 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_67 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_0_2 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_72 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_0_3 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_77 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_0_4 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_82 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_0_5 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_87 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_89 + tmp_qloop_88);
+                   const real_t q_tmp_1_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_92 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_92 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_2_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_97 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_97 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_97 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_3_0 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_33 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_45 + tmp_qloop_41);
+                   const real_t q_tmp_3_1 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_67 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_3_2 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_72 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_3_3 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_4 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_3_5 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_87 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_89 + tmp_qloop_88);
+                   const real_t q_tmp_4_0 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_33 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_45 + tmp_qloop_41);
+                   const real_t q_tmp_4_1 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_67 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_4_2 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_72 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_4_3 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_77 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_4_4 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_82 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_4_5 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_87 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_89 + tmp_qloop_88);
+                   const real_t q_tmp_5_0 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_33 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_45 + tmp_qloop_41);
+                   const real_t q_tmp_5_1 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_67 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_5_2 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_72 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_5_3 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_77 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_5_4 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_82 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_5_5 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_87 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_89 + tmp_qloop_88);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,tmp_qloop_7);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_8,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_7),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_7);
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(tmp_qloop_23,tmp_qloop_9));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_4),tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q]));
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31);
+                   const __m256d tmp_qloop_33 = _mm256_add_pd(tmp_qloop_29,tmp_qloop_32);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_38 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_30);
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q])),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_31)),tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q])));
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_5,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_50);
+                   const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_52);
+                   const __m256d tmp_qloop_54 = _mm256_add_pd(tmp_qloop_51,tmp_qloop_53);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_54)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_55,tmp_qloop_55,tmp_qloop_55,tmp_qloop_55));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56);
+                   const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_52),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_50),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_54),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_54)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(tmp_qloop_55,tmp_qloop_55,tmp_qloop_55,tmp_qloop_55)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_56);
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(tmp_qloop_55,tmp_qloop_55,tmp_qloop_55,tmp_qloop_55)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_52);
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_60,tmp_qloop_63))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_51,tmp_qloop_62)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_60),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66);
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71);
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(tmp_qloop_75,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(tmp_qloop_80,tmp_qloop_81);
+                   const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_84 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(tmp_qloop_85,tmp_qloop_86);
+                   const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_40,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(2.0,2.0,2.0,2.0))));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5]))),tmp_qloop_44);
+                   const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_94 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1])));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_97 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_98 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2])));
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_102 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_103 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_104 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3])));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_109 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4])));
+                   const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_111 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_112 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_42,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(1.0,1.0,1.0,1.0))));
+                   const __m256d tmp_qloop_114 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5])));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_49),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_38),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_43));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_68));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_73));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_78));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_83));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_43),tmp_qloop_88));
+                   const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_93));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_68),tmp_qloop_93));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_73),tmp_qloop_93));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_78),tmp_qloop_93));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_83),tmp_qloop_93));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_89,tmp_qloop_94),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_88),tmp_qloop_93));
+                   const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_41),tmp_qloop_98));
+                   const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_68),tmp_qloop_98));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_73),tmp_qloop_98));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_78),tmp_qloop_98));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_84,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_83),tmp_qloop_98));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_89,tmp_qloop_99),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_88),tmp_qloop_98));
+                   const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_45),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_33),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_41));
+                   const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_68));
+                   const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_73));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_78));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_83));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_103),tmp_qloop_88));
+                   const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_45),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_33),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_41));
+                   const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_68));
+                   const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_73));
+                   const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_78));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_83));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_108),tmp_qloop_88));
+                   const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_45),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_33),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_41));
+                   const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_69),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_68));
+                   const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_74),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_73));
+                   const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_79),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_78));
+                   const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_84),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_83));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_64,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,tmp_qloop_89),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_112,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_113),tmp_qloop_88));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0);
+                   q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0);
+                   q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1);
+                   q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0);
+                   q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1);
+                   q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2);
+                   q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0);
+                   q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1);
+                   q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2);
+                   q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3);
+                   q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_0 = 0.0;
+                real_t q_acc_2_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_0 = 0.0;
+                real_t q_acc_3_1 = 0.0;
+                real_t q_acc_3_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_0 = 0.0;
+                real_t q_acc_4_1 = 0.0;
+                real_t q_acc_4_2 = 0.0;
+                real_t q_acc_4_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_0 = 0.0;
+                real_t q_acc_5_1 = 0.0;
+                real_t q_acc_5_2 = 0.0;
+                real_t q_acc_5_3 = 0.0;
+                real_t q_acc_5_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                   const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                   const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                   const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                   const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                   const real_t tmp_qloop_30 = -tmp_qloop_26;
+                   const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                   const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                   const real_t tmp_qloop_33 = tmp_qloop_29 + tmp_qloop_32;
+                   const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                   const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                   const real_t tmp_qloop_38 = tmp_qloop_35*0.66666666666666667 + tmp_qloop_37*0.66666666666666667;
+                   const real_t tmp_qloop_39 = tmp_qloop_27*tmp_qloop_30;
+                   const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q];
+                   const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0);
+                   const real_t tmp_qloop_42 = tmp_qloop_34*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q];
+                   const real_t tmp_qloop_43 = tmp_qloop_42*(tmp_qloop_35*1.0 + tmp_qloop_37*1.0);
+                   const real_t tmp_qloop_44 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                   const real_t tmp_qloop_45 = tmp_qloop_24*tmp_qloop_31 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_44;
+                   const real_t tmp_qloop_46 = tmp_qloop_28*0.5;
+                   const real_t tmp_qloop_47 = tmp_qloop_39*0.5;
+                   const real_t tmp_qloop_48 = tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_36*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                   const real_t tmp_qloop_49 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_50 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                   const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                   const real_t tmp_qloop_52 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_53 = (tmp_qloop_52*tmp_qloop_52);
+                   const real_t tmp_qloop_54 = tmp_qloop_51 + tmp_qloop_53;
+                   const real_t tmp_qloop_56 = pow(tmp_qloop_54, -0.50000000000000000)*tmp_qloop_55*1.0;
+                   const real_t tmp_qloop_57 = tmp_qloop_50*tmp_qloop_56;
+                   const real_t tmp_qloop_58 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_52) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_50);
+                   const real_t tmp_qloop_59 = pow(tmp_qloop_54, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_60 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                   const real_t tmp_qloop_61 = tmp_qloop_52*tmp_qloop_56;
+                   const real_t tmp_qloop_62 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                   const real_t tmp_qloop_63 = tmp_qloop_50*tmp_qloop_52;
+                   const real_t tmp_qloop_64 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_57 - tmp_qloop_62*tmp_qloop_63)*(tmp_qloop_12*tmp_qloop_61 + tmp_qloop_60*tmp_qloop_63) - (tmp_qloop_0*tmp_qloop_61 + tmp_qloop_51*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_57 - tmp_qloop_53*tmp_qloop_60))*_data_q_w[q];
+                   const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_66 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                   const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                   const real_t tmp_qloop_68 = tmp_qloop_40*(tmp_qloop_65*2.0 + tmp_qloop_66*2.0);
+                   const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_44;
+                   const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_71 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                   const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                   const real_t tmp_qloop_73 = tmp_qloop_40*(tmp_qloop_70*2.0 + tmp_qloop_71*2.0);
+                   const real_t tmp_qloop_74 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_44;
+                   const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_76 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                   const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_40*(tmp_qloop_75*2.0 + tmp_qloop_76*2.0);
+                   const real_t tmp_qloop_79 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_44;
+                   const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_81 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                   const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                   const real_t tmp_qloop_83 = tmp_qloop_40*(tmp_qloop_80*2.0 + tmp_qloop_81*2.0);
+                   const real_t tmp_qloop_84 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_44;
+                   const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_86 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                   const real_t tmp_qloop_87 = tmp_qloop_85 + tmp_qloop_86;
+                   const real_t tmp_qloop_88 = tmp_qloop_40*(tmp_qloop_85*2.0 + tmp_qloop_86*2.0);
+                   const real_t tmp_qloop_89 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_44;
+                   const real_t tmp_qloop_90 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_91 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                   const real_t tmp_qloop_93 = tmp_qloop_42*(tmp_qloop_90*1.0 + tmp_qloop_91*1.0);
+                   const real_t tmp_qloop_94 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_95 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_96 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                   const real_t tmp_qloop_98 = tmp_qloop_42*(tmp_qloop_95*1.0 + tmp_qloop_96*1.0);
+                   const real_t tmp_qloop_99 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_100 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_101 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                   const real_t tmp_qloop_103 = tmp_qloop_42*(tmp_qloop_100*1.0 + tmp_qloop_101*1.0);
+                   const real_t tmp_qloop_104 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_105 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_106 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_107 = tmp_qloop_105*0.66666666666666667 + tmp_qloop_106*0.66666666666666667;
+                   const real_t tmp_qloop_108 = tmp_qloop_42*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0);
+                   const real_t tmp_qloop_109 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_48*2.0;
+                   const real_t tmp_qloop_110 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_111 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_112 = tmp_qloop_110*0.66666666666666667 + tmp_qloop_111*0.66666666666666667;
+                   const real_t tmp_qloop_113 = tmp_qloop_42*(tmp_qloop_110*1.0 + tmp_qloop_111*1.0);
+                   const real_t tmp_qloop_114 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_48*2.0;
+                   const real_t q_tmp_0_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_38 + tmp_qloop_41 + tmp_qloop_43 + tmp_qloop_45*tmp_qloop_49);
+                   const real_t q_tmp_0_1 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_67 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_0_2 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_72 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_0_3 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_77 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_0_4 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_82 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_0_5 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_87 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_89 + tmp_qloop_88);
+                   const real_t q_tmp_1_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_92 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_1_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_92 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_94 + tmp_qloop_93);
+                   const real_t q_tmp_2_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_97 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_97 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_2_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_97 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_99 + tmp_qloop_98);
+                   const real_t q_tmp_3_0 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_33 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_45 + tmp_qloop_41);
+                   const real_t q_tmp_3_1 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_67 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_3_2 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_72 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_3_3 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_3_4 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_3_5 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_87 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_89 + tmp_qloop_88);
+                   const real_t q_tmp_4_0 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_33 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_45 + tmp_qloop_41);
+                   const real_t q_tmp_4_1 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_67 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_4_2 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_72 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_4_3 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_77 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_4_4 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_82 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_4_5 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_87 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_89 + tmp_qloop_88);
+                   const real_t q_tmp_5_0 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_33 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_45 + tmp_qloop_41);
+                   const real_t q_tmp_5_1 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_67 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_69 + tmp_qloop_68);
+                   const real_t q_tmp_5_2 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_72 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_74 + tmp_qloop_73);
+                   const real_t q_tmp_5_3 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_77 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_79 + tmp_qloop_78);
+                   const real_t q_tmp_5_4 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_82 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_84 + tmp_qloop_83);
+                   const real_t q_tmp_5_5 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_87 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_89 + tmp_qloop_88);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                   q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                   q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                   q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                   q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                   q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                   q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                   q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                   q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                   q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                   q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..817e554839644fe749719b3c9c7d2418b4010163
--- /dev/null
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
@@ -0,0 +1,1011 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_50 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_5,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_4),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_21,tmp_qloop_9));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_5),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]))));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_49)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_50,tmp_qloop_50,tmp_qloop_50,tmp_qloop_50));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_51);
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_47),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_45),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_49),_mm256_mul_pd(tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_50,tmp_qloop_50,tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_51);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_50,tmp_qloop_50,tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_47);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_46,tmp_qloop_57)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_55),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_57,tmp_qloop_58),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_58))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(tmp_qloop_60,tmp_qloop_61);
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66);
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71);
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(tmp_qloop_75,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(tmp_qloop_80,tmp_qloop_81);
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_84 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_88 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1])));
+                   const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_94 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2])));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_97 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3])));
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_102 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_103 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_104 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4])));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(2.0,2.0,2.0,2.0)))),_mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_30)),tmp_qloop_40))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_add_pd(tmp_qloop_29,tmp_qloop_31)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_63),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_64)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_68),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_74)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_79)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_83),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_84)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_69,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_69,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_98),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_98),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_98),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_104,tmp_qloop_79)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_83),_mm256_mul_pd(tmp_qloop_104,tmp_qloop_84)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_add_pd(tmp_qloop_105,tmp_qloop_106)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_84,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]))),tmp_qloop_43)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                   const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                   const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                   const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                   const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                   const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                   const real_t tmp_qloop_35 = tmp_qloop_32*0.66666666666666667 + tmp_qloop_34*0.66666666666666667;
+                   const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = -tmp_qloop_26*tmp_qloop_27;
+                   const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_32*1.0 + tmp_qloop_34*1.0;
+                   const real_t tmp_qloop_40 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                   const real_t tmp_qloop_41 = tmp_qloop_36*0.5;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*0.5;
+                   const real_t tmp_qloop_43 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                   const real_t tmp_qloop_44 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                   const real_t tmp_qloop_47 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_48 = (tmp_qloop_47*tmp_qloop_47);
+                   const real_t tmp_qloop_49 = tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_51 = pow(tmp_qloop_49, -0.50000000000000000)*tmp_qloop_50*1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_45*tmp_qloop_51;
+                   const real_t tmp_qloop_53 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_45) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_47);
+                   const real_t tmp_qloop_54 = pow(tmp_qloop_49, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_55 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                   const real_t tmp_qloop_56 = tmp_qloop_47*tmp_qloop_51;
+                   const real_t tmp_qloop_57 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                   const real_t tmp_qloop_58 = tmp_qloop_45*tmp_qloop_47;
+                   const real_t tmp_qloop_59 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_52 - tmp_qloop_48*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_56 + tmp_qloop_46*tmp_qloop_57) - (tmp_qloop_0*tmp_qloop_56 + tmp_qloop_55*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_52 - tmp_qloop_57*tmp_qloop_58))*_data_q_w[q];
+                   const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_61 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_60 + tmp_qloop_61;
+                   const real_t tmp_qloop_63 = tmp_qloop_60*2.0 + tmp_qloop_61*2.0;
+                   const real_t tmp_qloop_64 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_40;
+                   const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                   const real_t tmp_qloop_68 = tmp_qloop_65*2.0 + tmp_qloop_66*2.0;
+                   const real_t tmp_qloop_69 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_40;
+                   const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_71 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                   const real_t tmp_qloop_73 = tmp_qloop_70*2.0 + tmp_qloop_71*2.0;
+                   const real_t tmp_qloop_74 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_40;
+                   const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_75*2.0 + tmp_qloop_76*2.0;
+                   const real_t tmp_qloop_79 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_40;
+                   const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_81 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                   const real_t tmp_qloop_83 = tmp_qloop_80*2.0 + tmp_qloop_81*2.0;
+                   const real_t tmp_qloop_84 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_40;
+                   const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_86 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_87 = tmp_qloop_85*0.66666666666666667 + tmp_qloop_86*0.66666666666666667;
+                   const real_t tmp_qloop_88 = tmp_qloop_85*1.0 + tmp_qloop_86*1.0;
+                   const real_t tmp_qloop_89 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_90 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_91 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                   const real_t tmp_qloop_93 = tmp_qloop_90*1.0 + tmp_qloop_91*1.0;
+                   const real_t tmp_qloop_94 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_95 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_96 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                   const real_t tmp_qloop_98 = tmp_qloop_95*1.0 + tmp_qloop_96*1.0;
+                   const real_t tmp_qloop_99 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_100 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_101 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                   const real_t tmp_qloop_103 = tmp_qloop_100*1.0 + tmp_qloop_101*1.0;
+                   const real_t tmp_qloop_104 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_105 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_106 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_59*(-tmp_qloop_35*(tmp_qloop_29 + tmp_qloop_31) + tmp_qloop_38 + tmp_qloop_39*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0) + tmp_qloop_44*(tmp_qloop_24*tmp_qloop_30 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_40));
+                   const real_t q_tmp_0_1 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_62 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_63 + tmp_qloop_44*tmp_qloop_64);
+                   const real_t q_tmp_0_2 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_67 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_68 + tmp_qloop_44*tmp_qloop_69);
+                   const real_t q_tmp_0_3 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_72 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_73 + tmp_qloop_44*tmp_qloop_74);
+                   const real_t q_tmp_0_4 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_77 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_78 + tmp_qloop_44*tmp_qloop_79);
+                   const real_t q_tmp_0_5 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_82 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_83 + tmp_qloop_44*tmp_qloop_84);
+                   const real_t q_tmp_1_1 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89);
+                   const real_t q_tmp_1_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_87 + tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89);
+                   const real_t q_tmp_1_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_87 + tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89);
+                   const real_t q_tmp_1_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_87 + tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89);
+                   const real_t q_tmp_1_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_87 + tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89);
+                   const real_t q_tmp_2_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94);
+                   const real_t q_tmp_2_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94);
+                   const real_t q_tmp_2_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94);
+                   const real_t q_tmp_2_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94);
+                   const real_t q_tmp_3_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                   const real_t q_tmp_3_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                   const real_t q_tmp_3_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                   const real_t q_tmp_4_4 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_38);
+                   const real_t q_tmp_4_5 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_38);
+                   const real_t q_tmp_5_5 = tmp_qloop_59*(tmp_qloop_38 + tmp_qloop_82*(tmp_qloop_105 + tmp_qloop_106)*-0.66666666666666667 + tmp_qloop_83*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0) + tmp_qloop_84*(tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_43)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_5,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_4),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_21,tmp_qloop_9));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_5),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]))));
+                   const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q])));
+                   const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_45);
+                   const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_47);
+                   const __m256d tmp_qloop_49 = _mm256_add_pd(tmp_qloop_46,tmp_qloop_48);
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_49)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_50,tmp_qloop_50,tmp_qloop_50,tmp_qloop_50));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_51);
+                   const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_47),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_45),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_49),_mm256_mul_pd(tmp_qloop_49,tmp_qloop_49)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_50,tmp_qloop_50,tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_51);
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_50,tmp_qloop_50,tmp_qloop_50,tmp_qloop_50)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_47);
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_46,tmp_qloop_57)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_55),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_57,tmp_qloop_58),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_58))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_62 = _mm256_add_pd(tmp_qloop_60,tmp_qloop_61);
+                   const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66);
+                   const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71);
+                   const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_77 = _mm256_add_pd(tmp_qloop_75,tmp_qloop_76);
+                   const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_82 = _mm256_add_pd(tmp_qloop_80,tmp_qloop_81);
+                   const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(2.0,2.0,2.0,2.0)));
+                   const __m256d tmp_qloop_84 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))),tmp_qloop_40);
+                   const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_88 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_85,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1])));
+                   const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_90,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_91,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_94 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2])));
+                   const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_97 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_95,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3])));
+                   const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_101 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_102 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(0.66666666666666667,0.66666666666666667,0.66666666666666667,0.66666666666666667)));
+                   const __m256d tmp_qloop_103 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_101,_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_104 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4])));
+                   const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(2.0,2.0,2.0,2.0)))),_mm256_mul_pd(tmp_qloop_44,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_30)),tmp_qloop_40))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_add_pd(tmp_qloop_29,tmp_qloop_31)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_63),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_64)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_68),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_73),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_74)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_72),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_79)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,tmp_qloop_83),_mm256_mul_pd(tmp_qloop_44,tmp_qloop_84)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_64,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_69,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_89)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_87),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_69,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_67,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_93),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_94)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,tmp_qloop_98),_mm256_mul_pd(tmp_qloop_74,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_72,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_98),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_98),_mm256_mul_pd(tmp_qloop_84,tmp_qloop_99)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_104,tmp_qloop_79)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_103,tmp_qloop_83),_mm256_mul_pd(tmp_qloop_104,tmp_qloop_84)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_82),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_59,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_add_pd(tmp_qloop_105,tmp_qloop_106)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_84,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]))),tmp_qloop_43)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_38));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3);
+                   q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4);
+                   q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3);
+                   q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4);
+                   q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3);
+                   q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4);
+                   q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4);
+                   q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_3,src_dof_0),_mm256_mul_pd(q_acc_1_3,src_dof_1)),_mm256_mul_pd(q_acc_2_3,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_4,src_dof_0),_mm256_mul_pd(q_acc_1_4,src_dof_1)),_mm256_mul_pd(q_acc_2_4,src_dof_2)),_mm256_mul_pd(q_acc_3_4,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_5,src_dof_0),_mm256_mul_pd(q_acc_1_5,src_dof_1)),_mm256_mul_pd(q_acc_2_5,src_dof_2)),_mm256_mul_pd(q_acc_3_5,src_dof_3)),_mm256_mul_pd(q_acc_4_5,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_0_3 = 0.0;
+                real_t q_acc_0_4 = 0.0;
+                real_t q_acc_0_5 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_1_3 = 0.0;
+                real_t q_acc_1_4 = 0.0;
+                real_t q_acc_1_5 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_2_3 = 0.0;
+                real_t q_acc_2_4 = 0.0;
+                real_t q_acc_2_5 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_3_4 = 0.0;
+                real_t q_acc_3_5 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_4_5 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                   const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                   const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                   const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                   const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                   const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                   const real_t tmp_qloop_35 = tmp_qloop_32*0.66666666666666667 + tmp_qloop_34*0.66666666666666667;
+                   const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = -tmp_qloop_26*tmp_qloop_27;
+                   const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                   const real_t tmp_qloop_39 = tmp_qloop_32*1.0 + tmp_qloop_34*1.0;
+                   const real_t tmp_qloop_40 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                   const real_t tmp_qloop_41 = tmp_qloop_36*0.5;
+                   const real_t tmp_qloop_42 = tmp_qloop_37*0.5;
+                   const real_t tmp_qloop_43 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                   const real_t tmp_qloop_44 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_45 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                   const real_t tmp_qloop_47 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_48 = (tmp_qloop_47*tmp_qloop_47);
+                   const real_t tmp_qloop_49 = tmp_qloop_46 + tmp_qloop_48;
+                   const real_t tmp_qloop_51 = pow(tmp_qloop_49, -0.50000000000000000)*tmp_qloop_50*1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_45*tmp_qloop_51;
+                   const real_t tmp_qloop_53 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_45) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_47);
+                   const real_t tmp_qloop_54 = pow(tmp_qloop_49, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_55 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                   const real_t tmp_qloop_56 = tmp_qloop_47*tmp_qloop_51;
+                   const real_t tmp_qloop_57 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                   const real_t tmp_qloop_58 = tmp_qloop_45*tmp_qloop_47;
+                   const real_t tmp_qloop_59 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_52 - tmp_qloop_48*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_56 + tmp_qloop_46*tmp_qloop_57) - (tmp_qloop_0*tmp_qloop_56 + tmp_qloop_55*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_52 - tmp_qloop_57*tmp_qloop_58))*_data_q_w[q];
+                   const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_61 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_62 = tmp_qloop_60 + tmp_qloop_61;
+                   const real_t tmp_qloop_63 = tmp_qloop_60*2.0 + tmp_qloop_61*2.0;
+                   const real_t tmp_qloop_64 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_40;
+                   const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                   const real_t tmp_qloop_68 = tmp_qloop_65*2.0 + tmp_qloop_66*2.0;
+                   const real_t tmp_qloop_69 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_40;
+                   const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_71 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                   const real_t tmp_qloop_73 = tmp_qloop_70*2.0 + tmp_qloop_71*2.0;
+                   const real_t tmp_qloop_74 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_40;
+                   const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                   const real_t tmp_qloop_78 = tmp_qloop_75*2.0 + tmp_qloop_76*2.0;
+                   const real_t tmp_qloop_79 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_40;
+                   const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_81 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                   const real_t tmp_qloop_83 = tmp_qloop_80*2.0 + tmp_qloop_81*2.0;
+                   const real_t tmp_qloop_84 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_40;
+                   const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_86 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_87 = tmp_qloop_85*0.66666666666666667 + tmp_qloop_86*0.66666666666666667;
+                   const real_t tmp_qloop_88 = tmp_qloop_85*1.0 + tmp_qloop_86*1.0;
+                   const real_t tmp_qloop_89 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_90 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_91 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                   const real_t tmp_qloop_93 = tmp_qloop_90*1.0 + tmp_qloop_91*1.0;
+                   const real_t tmp_qloop_94 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_95 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_96 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                   const real_t tmp_qloop_98 = tmp_qloop_95*1.0 + tmp_qloop_96*1.0;
+                   const real_t tmp_qloop_99 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_100 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_101 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                   const real_t tmp_qloop_103 = tmp_qloop_100*1.0 + tmp_qloop_101*1.0;
+                   const real_t tmp_qloop_104 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_43*2.0;
+                   const real_t tmp_qloop_105 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_106 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_59*(-tmp_qloop_35*(tmp_qloop_29 + tmp_qloop_31) + tmp_qloop_38 + tmp_qloop_39*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0) + tmp_qloop_44*(tmp_qloop_24*tmp_qloop_30 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_40));
+                   const real_t q_tmp_0_1 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_62 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_63 + tmp_qloop_44*tmp_qloop_64);
+                   const real_t q_tmp_0_2 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_67 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_68 + tmp_qloop_44*tmp_qloop_69);
+                   const real_t q_tmp_0_3 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_72 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_73 + tmp_qloop_44*tmp_qloop_74);
+                   const real_t q_tmp_0_4 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_77 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_78 + tmp_qloop_44*tmp_qloop_79);
+                   const real_t q_tmp_0_5 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_82 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_83 + tmp_qloop_44*tmp_qloop_84);
+                   const real_t q_tmp_1_1 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89);
+                   const real_t q_tmp_1_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_87 + tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89);
+                   const real_t q_tmp_1_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_87 + tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89);
+                   const real_t q_tmp_1_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_87 + tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89);
+                   const real_t q_tmp_1_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_87 + tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89);
+                   const real_t q_tmp_2_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94);
+                   const real_t q_tmp_2_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94);
+                   const real_t q_tmp_2_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94);
+                   const real_t q_tmp_2_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94);
+                   const real_t q_tmp_3_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                   const real_t q_tmp_3_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                   const real_t q_tmp_3_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                   const real_t q_tmp_4_4 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_38);
+                   const real_t q_tmp_4_5 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_38);
+                   const real_t q_tmp_5_5 = tmp_qloop_59*(tmp_qloop_38 + tmp_qloop_82*(tmp_qloop_105 + tmp_qloop_106)*-0.66666666666666667 + tmp_qloop_83*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0) + tmp_qloop_84*(tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_43)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                   q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                   q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                   q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                   q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                   q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                   q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                   q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+                const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+                const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+                const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..98871d5e9bfa773f047579496d545d6c130c482a
--- /dev/null
+++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,687 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_47 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_5,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_4),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_21,tmp_qloop_9));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_5),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q]));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]))));
+                   const __m256d tmp_qloop_38 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q])),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q],_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(tmp_qloop_43,tmp_qloop_45);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_46)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_47,tmp_qloop_47,tmp_qloop_47,tmp_qloop_47));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_44),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_42),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_46),_mm256_mul_pd(tmp_qloop_46,tmp_qloop_46)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_47,tmp_qloop_47,tmp_qloop_47,tmp_qloop_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_48);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_51,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_47,tmp_qloop_47,tmp_qloop_47,tmp_qloop_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_44);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q],_data_phi_0_0_GRAY[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1],_data_phi_0_0_GRAY[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2],_data_phi_0_0_GRAY[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3],_data_phi_0_0_GRAY[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4],_data_phi_0_0_GRAY[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5],_data_phi_0_0_GRAY[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_43,tmp_qloop_54)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_52),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_54,tmp_qloop_55),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_52,tmp_qloop_55))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2]));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3]));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4]));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_29,tmp_qloop_31),_mm256_add_pd(tmp_qloop_32,tmp_qloop_34)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_30)),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_57,tmp_qloop_58),_mm256_add_pd(tmp_qloop_59,tmp_qloop_60)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_61,tmp_qloop_62),_mm256_add_pd(tmp_qloop_63,tmp_qloop_64)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_65,tmp_qloop_66),_mm256_add_pd(tmp_qloop_67,tmp_qloop_68)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_69,tmp_qloop_70),_mm256_add_pd(tmp_qloop_71,tmp_qloop_72)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_74,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_73,tmp_qloop_74),_mm256_add_pd(tmp_qloop_75,tmp_qloop_76)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                   const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                   const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                   const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                   const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                   const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                   const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_36 = -tmp_qloop_26*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = (tmp_qloop_35*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])*(tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                   const real_t tmp_qloop_38 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_35*0.5;
+                   const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                   const real_t tmp_qloop_41 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                   const real_t tmp_qloop_42 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (tmp_qloop_42*tmp_qloop_42);
+                   const real_t tmp_qloop_44 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                   const real_t tmp_qloop_46 = tmp_qloop_43 + tmp_qloop_45;
+                   const real_t tmp_qloop_48 = pow(tmp_qloop_46, -0.50000000000000000)*tmp_qloop_47*1.0;
+                   const real_t tmp_qloop_49 = tmp_qloop_42*tmp_qloop_48;
+                   const real_t tmp_qloop_50 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_42) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_44);
+                   const real_t tmp_qloop_51 = pow(tmp_qloop_46, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                   const real_t tmp_qloop_53 = tmp_qloop_44*tmp_qloop_48;
+                   const real_t tmp_qloop_54 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                   const real_t tmp_qloop_55 = tmp_qloop_42*tmp_qloop_44;
+                   const real_t tmp_qloop_56 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_49 - tmp_qloop_45*tmp_qloop_52)*(tmp_qloop_12*tmp_qloop_53 + tmp_qloop_43*tmp_qloop_54) - (tmp_qloop_0*tmp_qloop_53 + tmp_qloop_52*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_49 - tmp_qloop_54*tmp_qloop_55))*_data_q_w[q];
+                   const real_t tmp_qloop_57 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_58 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_59 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_60 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                   const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_62 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                   const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_67 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_68 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                   const real_t tmp_qloop_69 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_70 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                   const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_74 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                   const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_29 + tmp_qloop_31)*(tmp_qloop_32 + tmp_qloop_34)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_31*2.0)*(tmp_qloop_32*1.0 + tmp_qloop_34*1.0) + (tmp_qloop_24*tmp_qloop_30 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_1_1 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_57 + tmp_qloop_58)*(tmp_qloop_59 + tmp_qloop_60)*-0.66666666666666667 + (tmp_qloop_57*2.0 + tmp_qloop_58*2.0)*(tmp_qloop_59*1.0 + tmp_qloop_60*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_2_2 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_61 + tmp_qloop_62)*(tmp_qloop_63 + tmp_qloop_64)*-0.66666666666666667 + (tmp_qloop_61*2.0 + tmp_qloop_62*2.0)*(tmp_qloop_63*1.0 + tmp_qloop_64*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_3_3 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_65 + tmp_qloop_66)*(tmp_qloop_67 + tmp_qloop_68)*-0.66666666666666667 + (tmp_qloop_65*2.0 + tmp_qloop_66*2.0)*(tmp_qloop_67*1.0 + tmp_qloop_68*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_4_4 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_69 + tmp_qloop_70)*(tmp_qloop_71 + tmp_qloop_72)*-0.66666666666666667 + (tmp_qloop_69*2.0 + tmp_qloop_70*2.0)*(tmp_qloop_71*1.0 + tmp_qloop_72*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_5_5 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_73 + tmp_qloop_74)*(tmp_qloop_75 + tmp_qloop_76)*-0.66666666666666667 + (tmp_qloop_73*2.0 + tmp_qloop_74*2.0)*(tmp_qloop_75*1.0 + tmp_qloop_76*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_41)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d mu_dof_0 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d mu_dof_1 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_2 = _mm256_loadu_pd(& _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d mu_dof_3 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d mu_dof_4 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]);
+                const __m256d mu_dof_5 = _mm256_loadu_pd(& _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_qloop_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const __m256d tmp_qloop_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0);
+                   const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1);
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(tmp_qloop_5,tmp_qloop_9);
+                   const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_10)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16));
+                   const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_4);
+                   const __m256d tmp_qloop_19 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_10),_mm256_mul_pd(tmp_qloop_10,tmp_qloop_10));
+                   const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_4),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13))),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                   const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_21,tmp_qloop_9));
+                   const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_8);
+                   const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_5),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_20),tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                   const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_4),tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_27 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_24),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_26)));
+                   const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_27);
+                   const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_30);
+                   const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_27);
+                   const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q]));
+                   const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_27);
+                   const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,tmp_qloop_27),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                   const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q]))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]))));
+                   const __m256d tmp_qloop_38 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q])),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(0.5,0.5,0.5,0.5));
+                   const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q])),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(0.5,0.5,0.5,0.5)),_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q],_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])));
+                   const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_42);
+                   const __m256d tmp_qloop_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])));
+                   const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_44);
+                   const __m256d tmp_qloop_46 = _mm256_add_pd(tmp_qloop_43,tmp_qloop_45);
+                   const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_46)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_47,tmp_qloop_47,tmp_qloop_47,tmp_qloop_47));
+                   const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_48);
+                   const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_44),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_42),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_51 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_46),_mm256_mul_pd(tmp_qloop_46,tmp_qloop_46)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_47,tmp_qloop_47,tmp_qloop_47,tmp_qloop_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_44,tmp_qloop_48);
+                   const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_51,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,_mm256_set_pd(tmp_qloop_47,tmp_qloop_47,tmp_qloop_47,tmp_qloop_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_44);
+                   const __m256d tmp_qloop_56 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(mu_dof_0,_mm256_set_pd(_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q],_data_phi_0_0_BLUE[6*q])),_mm256_mul_pd(mu_dof_1,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1],_data_phi_0_0_BLUE[6*q + 1]))),_mm256_mul_pd(mu_dof_2,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2],_data_phi_0_0_BLUE[6*q + 2]))),_mm256_mul_pd(mu_dof_3,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3],_data_phi_0_0_BLUE[6*q + 3]))),_mm256_mul_pd(mu_dof_4,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4],_data_phi_0_0_BLUE[6*q + 4]))),_mm256_mul_pd(mu_dof_5,_mm256_set_pd(_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5],_data_phi_0_0_BLUE[6*q + 5]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_43,tmp_qloop_54)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_45,tmp_qloop_52),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_54,tmp_qloop_55),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_52,tmp_qloop_55))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                   const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1]));
+                   const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2]));
+                   const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3]));
+                   const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_72 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4]));
+                   const __m256d tmp_qloop_73 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_75 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]));
+                   const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5]));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_29,tmp_qloop_31),_mm256_add_pd(tmp_qloop_32,tmp_qloop_34)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_30)),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_59,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_60,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_57,tmp_qloop_58),_mm256_add_pd(tmp_qloop_59,tmp_qloop_60)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_61,tmp_qloop_62),_mm256_add_pd(tmp_qloop_63,tmp_qloop_64)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_65,tmp_qloop_66),_mm256_add_pd(tmp_qloop_67,tmp_qloop_68)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_69,tmp_qloop_70),_mm256_add_pd(tmp_qloop_71,tmp_qloop_72)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_56,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_74,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(1.0,1.0,1.0,1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(tmp_qloop_73,tmp_qloop_74),_mm256_add_pd(tmp_qloop_75,tmp_qloop_76)),_mm256_set_pd(-0.66666666666666667,-0.66666666666666667,-0.66666666666666667,-0.66666666666666667))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5]))),tmp_qloop_38),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5])),_mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5],_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5]))),tmp_qloop_41)),_mm256_set_pd(2.0,2.0,2.0,2.0))),tmp_qloop_37));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                   q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3);
+                   q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4);
+                   q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                const __m256d elMatDiag_3 = q_acc_3_3;
+                const __m256d elMatDiag_4 = q_acc_4_4;
+                const __m256d elMatDiag_5 = q_acc_5_5;
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                real_t q_acc_3_3 = 0.0;
+                real_t q_acc_4_4 = 0.0;
+                real_t q_acc_5_5 = 0.0;
+                const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+                for (int64_t q = 0; q < 4; q += 1)
+                {
+                   const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                   const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                   const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                   const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                   const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                   const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                   const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                   const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                   const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                   const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                   const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                   const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                   const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                   const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                   const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                   const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                   const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                   const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                   const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                   const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                   const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                   const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_27;
+                   const real_t tmp_qloop_36 = -tmp_qloop_26*tmp_qloop_27;
+                   const real_t tmp_qloop_37 = (tmp_qloop_35*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])*(tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                   const real_t tmp_qloop_38 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                   const real_t tmp_qloop_39 = tmp_qloop_35*0.5;
+                   const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                   const real_t tmp_qloop_41 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                   const real_t tmp_qloop_42 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                   const real_t tmp_qloop_43 = (tmp_qloop_42*tmp_qloop_42);
+                   const real_t tmp_qloop_44 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                   const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                   const real_t tmp_qloop_46 = tmp_qloop_43 + tmp_qloop_45;
+                   const real_t tmp_qloop_48 = pow(tmp_qloop_46, -0.50000000000000000)*tmp_qloop_47*1.0;
+                   const real_t tmp_qloop_49 = tmp_qloop_42*tmp_qloop_48;
+                   const real_t tmp_qloop_50 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_42) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_44);
+                   const real_t tmp_qloop_51 = pow(tmp_qloop_46, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_52 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                   const real_t tmp_qloop_53 = tmp_qloop_44*tmp_qloop_48;
+                   const real_t tmp_qloop_54 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                   const real_t tmp_qloop_55 = tmp_qloop_42*tmp_qloop_44;
+                   const real_t tmp_qloop_56 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_49 - tmp_qloop_45*tmp_qloop_52)*(tmp_qloop_12*tmp_qloop_53 + tmp_qloop_43*tmp_qloop_54) - (tmp_qloop_0*tmp_qloop_53 + tmp_qloop_52*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_49 - tmp_qloop_54*tmp_qloop_55))*_data_q_w[q];
+                   const real_t tmp_qloop_57 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_58 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_59 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_60 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                   const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_62 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_64 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                   const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_67 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_68 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                   const real_t tmp_qloop_69 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_70 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                   const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_74 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                   const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                   const real_t q_tmp_0_0 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_29 + tmp_qloop_31)*(tmp_qloop_32 + tmp_qloop_34)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_31*2.0)*(tmp_qloop_32*1.0 + tmp_qloop_34*1.0) + (tmp_qloop_24*tmp_qloop_30 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_1_1 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_57 + tmp_qloop_58)*(tmp_qloop_59 + tmp_qloop_60)*-0.66666666666666667 + (tmp_qloop_57*2.0 + tmp_qloop_58*2.0)*(tmp_qloop_59*1.0 + tmp_qloop_60*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_2_2 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_61 + tmp_qloop_62)*(tmp_qloop_63 + tmp_qloop_64)*-0.66666666666666667 + (tmp_qloop_61*2.0 + tmp_qloop_62*2.0)*(tmp_qloop_63*1.0 + tmp_qloop_64*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_3_3 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_65 + tmp_qloop_66)*(tmp_qloop_67 + tmp_qloop_68)*-0.66666666666666667 + (tmp_qloop_65*2.0 + tmp_qloop_66*2.0)*(tmp_qloop_67*1.0 + tmp_qloop_68*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_4_4 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_69 + tmp_qloop_70)*(tmp_qloop_71 + tmp_qloop_72)*-0.66666666666666667 + (tmp_qloop_69*2.0 + tmp_qloop_70*2.0)*(tmp_qloop_71*1.0 + tmp_qloop_72*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_41)*2.0);
+                   const real_t q_tmp_5_5 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_73 + tmp_qloop_74)*(tmp_qloop_75 + tmp_qloop_76)*-0.66666666666666667 + (tmp_qloop_73*2.0 + tmp_qloop_74*2.0)*(tmp_qloop_75*1.0 + tmp_qloop_76*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_41)*2.0);
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                   q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                   q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                   q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                const real_t elMatDiag_3 = q_acc_3_3;
+                const real_t elMatDiag_4 = q_acc_4_4;
+                const real_t elMatDiag_5 = q_acc_5_5;
+                _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7004dd259a72e91d0a0a580c0710ed38b393274d
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,589 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_51 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_33*0.66666666666666667 + tmp_qloop_35*0.66666666666666667;
+                const real_t tmp_qloop_37 = tmp_qloop_33*1.0 + tmp_qloop_35*1.0;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_40 = (tmp_qloop_38*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])*(tmp_qloop_38*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_39*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                const real_t tmp_qloop_41 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_42 = tmp_qloop_38*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = tmp_qloop_61*2.0 + tmp_qloop_62*2.0;
+                const real_t tmp_qloop_65 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_41;
+                const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_68 = tmp_qloop_66 + tmp_qloop_67;
+                const real_t tmp_qloop_69 = tmp_qloop_66*2.0 + tmp_qloop_67*2.0;
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_41;
+                const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_73 = tmp_qloop_71 + tmp_qloop_72;
+                const real_t tmp_qloop_74 = tmp_qloop_71*2.0 + tmp_qloop_72*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_41;
+                const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_78 = tmp_qloop_76 + tmp_qloop_77;
+                const real_t tmp_qloop_79 = tmp_qloop_76*2.0 + tmp_qloop_77*2.0;
+                const real_t tmp_qloop_80 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_41;
+                const real_t tmp_qloop_81 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_83 = tmp_qloop_81 + tmp_qloop_82;
+                const real_t tmp_qloop_84 = tmp_qloop_81*2.0 + tmp_qloop_82*2.0;
+                const real_t tmp_qloop_85 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_41;
+                const real_t tmp_qloop_86 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_88 = tmp_qloop_86*0.66666666666666667 + tmp_qloop_87*0.66666666666666667;
+                const real_t tmp_qloop_89 = tmp_qloop_86*1.0 + tmp_qloop_87*1.0;
+                const real_t tmp_qloop_90 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_91 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_93 = tmp_qloop_91*0.66666666666666667 + tmp_qloop_92*0.66666666666666667;
+                const real_t tmp_qloop_94 = tmp_qloop_91*1.0 + tmp_qloop_92*1.0;
+                const real_t tmp_qloop_95 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_96 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_98 = tmp_qloop_96*0.66666666666666667 + tmp_qloop_97*0.66666666666666667;
+                const real_t tmp_qloop_99 = tmp_qloop_96*1.0 + tmp_qloop_97*1.0;
+                const real_t tmp_qloop_100 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_101 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_103 = tmp_qloop_101*0.66666666666666667 + tmp_qloop_102*0.66666666666666667;
+                const real_t tmp_qloop_104 = tmp_qloop_101*1.0 + tmp_qloop_102*1.0;
+                const real_t tmp_qloop_105 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_106 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_107 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_60*(-tmp_qloop_36*(tmp_qloop_29 + tmp_qloop_32) + tmp_qloop_37*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0) + tmp_qloop_40 + tmp_qloop_45*(tmp_qloop_24*tmp_qloop_31 + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_41));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_63 + tmp_qloop_37*tmp_qloop_64 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_65);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_68 + tmp_qloop_37*tmp_qloop_69 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_73 + tmp_qloop_37*tmp_qloop_74 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_75);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_78 + tmp_qloop_37*tmp_qloop_79 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_80);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_83 + tmp_qloop_37*tmp_qloop_84 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_85);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89 + tmp_qloop_65*tmp_qloop_90);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89 + tmp_qloop_70*tmp_qloop_90);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89 + tmp_qloop_75*tmp_qloop_90);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89 + tmp_qloop_80*tmp_qloop_90);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89 + tmp_qloop_85*tmp_qloop_90);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_70*tmp_qloop_95);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_75*tmp_qloop_95);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_80*tmp_qloop_95);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_85*tmp_qloop_95);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_75 + tmp_qloop_40 - tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_80 + tmp_qloop_40 - tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_85 + tmp_qloop_40 - tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_105*tmp_qloop_80 + tmp_qloop_40);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_105*tmp_qloop_85 + tmp_qloop_40);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_40 + tmp_qloop_83*(tmp_qloop_106 + tmp_qloop_107)*-0.66666666666666667 + tmp_qloop_84*(tmp_qloop_106*1.0 + tmp_qloop_107*1.0) + tmp_qloop_85*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_44)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_33*0.66666666666666667 + tmp_qloop_35*0.66666666666666667;
+                const real_t tmp_qloop_37 = tmp_qloop_33*1.0 + tmp_qloop_35*1.0;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_40 = (tmp_qloop_38*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])*(tmp_qloop_38*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_39*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                const real_t tmp_qloop_41 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_42 = tmp_qloop_38*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = tmp_qloop_61*2.0 + tmp_qloop_62*2.0;
+                const real_t tmp_qloop_65 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_41;
+                const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_68 = tmp_qloop_66 + tmp_qloop_67;
+                const real_t tmp_qloop_69 = tmp_qloop_66*2.0 + tmp_qloop_67*2.0;
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_41;
+                const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_73 = tmp_qloop_71 + tmp_qloop_72;
+                const real_t tmp_qloop_74 = tmp_qloop_71*2.0 + tmp_qloop_72*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_41;
+                const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_78 = tmp_qloop_76 + tmp_qloop_77;
+                const real_t tmp_qloop_79 = tmp_qloop_76*2.0 + tmp_qloop_77*2.0;
+                const real_t tmp_qloop_80 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_41;
+                const real_t tmp_qloop_81 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_83 = tmp_qloop_81 + tmp_qloop_82;
+                const real_t tmp_qloop_84 = tmp_qloop_81*2.0 + tmp_qloop_82*2.0;
+                const real_t tmp_qloop_85 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_41;
+                const real_t tmp_qloop_86 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_88 = tmp_qloop_86*0.66666666666666667 + tmp_qloop_87*0.66666666666666667;
+                const real_t tmp_qloop_89 = tmp_qloop_86*1.0 + tmp_qloop_87*1.0;
+                const real_t tmp_qloop_90 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_91 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_93 = tmp_qloop_91*0.66666666666666667 + tmp_qloop_92*0.66666666666666667;
+                const real_t tmp_qloop_94 = tmp_qloop_91*1.0 + tmp_qloop_92*1.0;
+                const real_t tmp_qloop_95 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_96 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_98 = tmp_qloop_96*0.66666666666666667 + tmp_qloop_97*0.66666666666666667;
+                const real_t tmp_qloop_99 = tmp_qloop_96*1.0 + tmp_qloop_97*1.0;
+                const real_t tmp_qloop_100 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_101 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_103 = tmp_qloop_101*0.66666666666666667 + tmp_qloop_102*0.66666666666666667;
+                const real_t tmp_qloop_104 = tmp_qloop_101*1.0 + tmp_qloop_102*1.0;
+                const real_t tmp_qloop_105 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_106 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_107 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_60*(-tmp_qloop_36*(tmp_qloop_29 + tmp_qloop_32) + tmp_qloop_37*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0) + tmp_qloop_40 + tmp_qloop_45*(tmp_qloop_24*tmp_qloop_31 + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_41));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_63 + tmp_qloop_37*tmp_qloop_64 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_65);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_68 + tmp_qloop_37*tmp_qloop_69 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_73 + tmp_qloop_37*tmp_qloop_74 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_75);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_78 + tmp_qloop_37*tmp_qloop_79 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_80);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_83 + tmp_qloop_37*tmp_qloop_84 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_85);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89 + tmp_qloop_65*tmp_qloop_90);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89 + tmp_qloop_70*tmp_qloop_90);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89 + tmp_qloop_75*tmp_qloop_90);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89 + tmp_qloop_80*tmp_qloop_90);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89 + tmp_qloop_85*tmp_qloop_90);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_70*tmp_qloop_95);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_75*tmp_qloop_95);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_80*tmp_qloop_95);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_85*tmp_qloop_95);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_75 + tmp_qloop_40 - tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_80 + tmp_qloop_40 - tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_85 + tmp_qloop_40 - tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_105*tmp_qloop_80 + tmp_qloop_40);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_105*tmp_qloop_85 + tmp_qloop_40);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_40 + tmp_qloop_83*(tmp_qloop_106 + tmp_qloop_107)*-0.66666666666666667 + tmp_qloop_84*(tmp_qloop_106*1.0 + tmp_qloop_107*1.0) + tmp_qloop_85*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_44)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..60e5836c222c6c38a978b803f843c9c703918412
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,427 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_48 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                const real_t tmp_qloop_39 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_41 = tmp_qloop_37*0.5;
+                const real_t tmp_qloop_42 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_43 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_44 = (tmp_qloop_43*tmp_qloop_43);
+                const real_t tmp_qloop_45 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                const real_t tmp_qloop_47 = tmp_qloop_44 + tmp_qloop_46;
+                const real_t tmp_qloop_49 = pow(tmp_qloop_47, -0.50000000000000000)*tmp_qloop_48*1.0;
+                const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                const real_t tmp_qloop_51 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_45) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_43);
+                const real_t tmp_qloop_52 = pow(tmp_qloop_47, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                const real_t tmp_qloop_54 = tmp_qloop_45*tmp_qloop_49;
+                const real_t tmp_qloop_55 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                const real_t tmp_qloop_56 = tmp_qloop_43*tmp_qloop_45;
+                const real_t tmp_qloop_57 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_50 - tmp_qloop_55*tmp_qloop_56)*(tmp_qloop_12*tmp_qloop_54 + tmp_qloop_53*tmp_qloop_56) - (tmp_qloop_0*tmp_qloop_54 + tmp_qloop_44*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_50 - tmp_qloop_46*tmp_qloop_53))*_data_q_w[q];
+                const real_t tmp_qloop_58 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_59 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_71 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_72 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_73 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_74 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_75 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_29 + tmp_qloop_32)*(tmp_qloop_33 + tmp_qloop_35)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_32*2.0)*(tmp_qloop_33*1.0 + tmp_qloop_35*1.0) + (tmp_qloop_24*tmp_qloop_31 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_1_1 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_58 + tmp_qloop_59)*(tmp_qloop_60 + tmp_qloop_61)*-0.66666666666666667 + (tmp_qloop_58*2.0 + tmp_qloop_59*2.0)*(tmp_qloop_60*1.0 + tmp_qloop_61*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_2_2 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_62 + tmp_qloop_63)*(tmp_qloop_64 + tmp_qloop_65)*-0.66666666666666667 + (tmp_qloop_62*2.0 + tmp_qloop_63*2.0)*(tmp_qloop_64*1.0 + tmp_qloop_65*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_3_3 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_66 + tmp_qloop_67)*(tmp_qloop_68 + tmp_qloop_69)*-0.66666666666666667 + (tmp_qloop_66*2.0 + tmp_qloop_67*2.0)*(tmp_qloop_68*1.0 + tmp_qloop_69*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_4_4 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_70 + tmp_qloop_71)*(tmp_qloop_72 + tmp_qloop_73)*-0.66666666666666667 + (tmp_qloop_70*2.0 + tmp_qloop_71*2.0)*(tmp_qloop_72*1.0 + tmp_qloop_73*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_5_5 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_74 + tmp_qloop_75)*(tmp_qloop_76 + tmp_qloop_77)*-0.66666666666666667 + (tmp_qloop_74*2.0 + tmp_qloop_75*2.0)*(tmp_qloop_76*1.0 + tmp_qloop_77*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_42)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_37 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                const real_t tmp_qloop_39 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_41 = tmp_qloop_37*0.5;
+                const real_t tmp_qloop_42 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_43 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_44 = (tmp_qloop_43*tmp_qloop_43);
+                const real_t tmp_qloop_45 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                const real_t tmp_qloop_47 = tmp_qloop_44 + tmp_qloop_46;
+                const real_t tmp_qloop_49 = pow(tmp_qloop_47, -0.50000000000000000)*tmp_qloop_48*1.0;
+                const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49;
+                const real_t tmp_qloop_51 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_45) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_43);
+                const real_t tmp_qloop_52 = pow(tmp_qloop_47, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                const real_t tmp_qloop_54 = tmp_qloop_45*tmp_qloop_49;
+                const real_t tmp_qloop_55 = tmp_qloop_52*(radRayVertex + tmp_qloop_48*tmp_qloop_51);
+                const real_t tmp_qloop_56 = tmp_qloop_43*tmp_qloop_45;
+                const real_t tmp_qloop_57 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_50 - tmp_qloop_55*tmp_qloop_56)*(tmp_qloop_12*tmp_qloop_54 + tmp_qloop_53*tmp_qloop_56) - (tmp_qloop_0*tmp_qloop_54 + tmp_qloop_44*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_50 - tmp_qloop_46*tmp_qloop_53))*_data_q_w[q];
+                const real_t tmp_qloop_58 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_59 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_63 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_65 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_71 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_72 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_73 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_74 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_75 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_29 + tmp_qloop_32)*(tmp_qloop_33 + tmp_qloop_35)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_32*2.0)*(tmp_qloop_33*1.0 + tmp_qloop_35*1.0) + (tmp_qloop_24*tmp_qloop_31 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_1_1 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_58 + tmp_qloop_59)*(tmp_qloop_60 + tmp_qloop_61)*-0.66666666666666667 + (tmp_qloop_58*2.0 + tmp_qloop_59*2.0)*(tmp_qloop_60*1.0 + tmp_qloop_61*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_2_2 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_62 + tmp_qloop_63)*(tmp_qloop_64 + tmp_qloop_65)*-0.66666666666666667 + (tmp_qloop_62*2.0 + tmp_qloop_63*2.0)*(tmp_qloop_64*1.0 + tmp_qloop_65*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_3_3 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_66 + tmp_qloop_67)*(tmp_qloop_68 + tmp_qloop_69)*-0.66666666666666667 + (tmp_qloop_66*2.0 + tmp_qloop_67*2.0)*(tmp_qloop_68*1.0 + tmp_qloop_69*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_4_4 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_70 + tmp_qloop_71)*(tmp_qloop_72 + tmp_qloop_73)*-0.66666666666666667 + (tmp_qloop_70*2.0 + tmp_qloop_71*2.0)*(tmp_qloop_72*1.0 + tmp_qloop_73*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_42)*2.0);
+                const real_t q_tmp_5_5 = tmp_qloop_57*(tmp_qloop_38 + (tmp_qloop_74 + tmp_qloop_75)*(tmp_qloop_76 + tmp_qloop_77)*-0.66666666666666667 + (tmp_qloop_74*2.0 + tmp_qloop_75*2.0)*(tmp_qloop_76*1.0 + tmp_qloop_77*1.0) + (tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_39)*(tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_42)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0f393a24993aefe359694ce3c225c8e09ac765e3
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,747 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_51 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_33*0.66666666666666667 + tmp_qloop_35*0.66666666666666667;
+                const real_t tmp_qloop_37 = tmp_qloop_33*1.0 + tmp_qloop_35*1.0;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_40 = (tmp_qloop_38*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q])*(tmp_qloop_38*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_39*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q]);
+                const real_t tmp_qloop_41 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_42 = tmp_qloop_38*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = tmp_qloop_61*2.0 + tmp_qloop_62*2.0;
+                const real_t tmp_qloop_65 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_41;
+                const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_68 = tmp_qloop_66 + tmp_qloop_67;
+                const real_t tmp_qloop_69 = tmp_qloop_66*2.0 + tmp_qloop_67*2.0;
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_41;
+                const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_73 = tmp_qloop_71 + tmp_qloop_72;
+                const real_t tmp_qloop_74 = tmp_qloop_71*2.0 + tmp_qloop_72*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_41;
+                const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_78 = tmp_qloop_76 + tmp_qloop_77;
+                const real_t tmp_qloop_79 = tmp_qloop_76*2.0 + tmp_qloop_77*2.0;
+                const real_t tmp_qloop_80 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_41;
+                const real_t tmp_qloop_81 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_83 = tmp_qloop_81 + tmp_qloop_82;
+                const real_t tmp_qloop_84 = tmp_qloop_81*2.0 + tmp_qloop_82*2.0;
+                const real_t tmp_qloop_85 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_41;
+                const real_t tmp_qloop_86 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_88 = tmp_qloop_86*0.66666666666666667 + tmp_qloop_87*0.66666666666666667;
+                const real_t tmp_qloop_89 = tmp_qloop_86*1.0 + tmp_qloop_87*1.0;
+                const real_t tmp_qloop_90 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_91 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_93 = tmp_qloop_91*0.66666666666666667 + tmp_qloop_92*0.66666666666666667;
+                const real_t tmp_qloop_94 = tmp_qloop_91*1.0 + tmp_qloop_92*1.0;
+                const real_t tmp_qloop_95 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_96 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_98 = tmp_qloop_96*0.66666666666666667 + tmp_qloop_97*0.66666666666666667;
+                const real_t tmp_qloop_99 = tmp_qloop_96*1.0 + tmp_qloop_97*1.0;
+                const real_t tmp_qloop_100 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_101 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_103 = tmp_qloop_101*0.66666666666666667 + tmp_qloop_102*0.66666666666666667;
+                const real_t tmp_qloop_104 = tmp_qloop_101*1.0 + tmp_qloop_102*1.0;
+                const real_t tmp_qloop_105 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_106 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_107 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_60*(-tmp_qloop_36*(tmp_qloop_29 + tmp_qloop_32) + tmp_qloop_37*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0) + tmp_qloop_40 + tmp_qloop_45*(tmp_qloop_24*tmp_qloop_31 + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_41));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_63 + tmp_qloop_37*tmp_qloop_64 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_65);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_68 + tmp_qloop_37*tmp_qloop_69 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_73 + tmp_qloop_37*tmp_qloop_74 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_75);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_78 + tmp_qloop_37*tmp_qloop_79 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_80);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_83 + tmp_qloop_37*tmp_qloop_84 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_85);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89 + tmp_qloop_65*tmp_qloop_90);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89 + tmp_qloop_70*tmp_qloop_90);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89 + tmp_qloop_75*tmp_qloop_90);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89 + tmp_qloop_80*tmp_qloop_90);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89 + tmp_qloop_85*tmp_qloop_90);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_70*tmp_qloop_95);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_75*tmp_qloop_95);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_80*tmp_qloop_95);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_85*tmp_qloop_95);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_75 + tmp_qloop_40 - tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_80 + tmp_qloop_40 - tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_85 + tmp_qloop_40 - tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_105*tmp_qloop_80 + tmp_qloop_40);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_105*tmp_qloop_85 + tmp_qloop_40);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_40 + tmp_qloop_83*(tmp_qloop_106 + tmp_qloop_107)*-0.66666666666666667 + tmp_qloop_84*(tmp_qloop_106*1.0 + tmp_qloop_107*1.0) + tmp_qloop_85*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_44)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_34 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_33*0.66666666666666667 + tmp_qloop_35*0.66666666666666667;
+                const real_t tmp_qloop_37 = tmp_qloop_33*1.0 + tmp_qloop_35*1.0;
+                const real_t tmp_qloop_38 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_39 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_40 = (tmp_qloop_38*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q])*(tmp_qloop_38*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_39*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q]);
+                const real_t tmp_qloop_41 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_42 = tmp_qloop_38*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_46 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_47 = (tmp_qloop_46*tmp_qloop_46);
+                const real_t tmp_qloop_48 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = tmp_qloop_47 + tmp_qloop_49;
+                const real_t tmp_qloop_52 = pow(tmp_qloop_50, -0.50000000000000000)*tmp_qloop_51*1.0;
+                const real_t tmp_qloop_53 = tmp_qloop_46*tmp_qloop_52;
+                const real_t tmp_qloop_54 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_48) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_46);
+                const real_t tmp_qloop_55 = pow(tmp_qloop_50, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_56 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_57 = tmp_qloop_48*tmp_qloop_52;
+                const real_t tmp_qloop_58 = tmp_qloop_55*(radRayVertex + tmp_qloop_51*tmp_qloop_54);
+                const real_t tmp_qloop_59 = tmp_qloop_46*tmp_qloop_48;
+                const real_t tmp_qloop_60 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_53 - tmp_qloop_58*tmp_qloop_59)*(tmp_qloop_12*tmp_qloop_57 + tmp_qloop_56*tmp_qloop_59) - (tmp_qloop_0*tmp_qloop_57 + tmp_qloop_47*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_53 - tmp_qloop_49*tmp_qloop_56))*_data_q_w[q];
+                const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62;
+                const real_t tmp_qloop_64 = tmp_qloop_61*2.0 + tmp_qloop_62*2.0;
+                const real_t tmp_qloop_65 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_41;
+                const real_t tmp_qloop_66 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_68 = tmp_qloop_66 + tmp_qloop_67;
+                const real_t tmp_qloop_69 = tmp_qloop_66*2.0 + tmp_qloop_67*2.0;
+                const real_t tmp_qloop_70 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_41;
+                const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_73 = tmp_qloop_71 + tmp_qloop_72;
+                const real_t tmp_qloop_74 = tmp_qloop_71*2.0 + tmp_qloop_72*2.0;
+                const real_t tmp_qloop_75 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_41;
+                const real_t tmp_qloop_76 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_78 = tmp_qloop_76 + tmp_qloop_77;
+                const real_t tmp_qloop_79 = tmp_qloop_76*2.0 + tmp_qloop_77*2.0;
+                const real_t tmp_qloop_80 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_41;
+                const real_t tmp_qloop_81 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_83 = tmp_qloop_81 + tmp_qloop_82;
+                const real_t tmp_qloop_84 = tmp_qloop_81*2.0 + tmp_qloop_82*2.0;
+                const real_t tmp_qloop_85 = tmp_qloop_38*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_41;
+                const real_t tmp_qloop_86 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_88 = tmp_qloop_86*0.66666666666666667 + tmp_qloop_87*0.66666666666666667;
+                const real_t tmp_qloop_89 = tmp_qloop_86*1.0 + tmp_qloop_87*1.0;
+                const real_t tmp_qloop_90 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_91 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_93 = tmp_qloop_91*0.66666666666666667 + tmp_qloop_92*0.66666666666666667;
+                const real_t tmp_qloop_94 = tmp_qloop_91*1.0 + tmp_qloop_92*1.0;
+                const real_t tmp_qloop_95 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_96 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_98 = tmp_qloop_96*0.66666666666666667 + tmp_qloop_97*0.66666666666666667;
+                const real_t tmp_qloop_99 = tmp_qloop_96*1.0 + tmp_qloop_97*1.0;
+                const real_t tmp_qloop_100 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_101 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_103 = tmp_qloop_101*0.66666666666666667 + tmp_qloop_102*0.66666666666666667;
+                const real_t tmp_qloop_104 = tmp_qloop_101*1.0 + tmp_qloop_102*1.0;
+                const real_t tmp_qloop_105 = tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_43*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_44*2.0;
+                const real_t tmp_qloop_106 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_107 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_60*(-tmp_qloop_36*(tmp_qloop_29 + tmp_qloop_32) + tmp_qloop_37*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0) + tmp_qloop_40 + tmp_qloop_45*(tmp_qloop_24*tmp_qloop_31 + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_41));
+                const real_t q_tmp_0_1 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_63 + tmp_qloop_37*tmp_qloop_64 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_65);
+                const real_t q_tmp_0_2 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_68 + tmp_qloop_37*tmp_qloop_69 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_70);
+                const real_t q_tmp_0_3 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_73 + tmp_qloop_37*tmp_qloop_74 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_75);
+                const real_t q_tmp_0_4 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_78 + tmp_qloop_37*tmp_qloop_79 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_80);
+                const real_t q_tmp_0_5 = tmp_qloop_60*(-tmp_qloop_36*tmp_qloop_83 + tmp_qloop_37*tmp_qloop_84 + tmp_qloop_40 + tmp_qloop_45*tmp_qloop_85);
+                const real_t q_tmp_1_1 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89 + tmp_qloop_65*tmp_qloop_90);
+                const real_t q_tmp_1_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89 + tmp_qloop_70*tmp_qloop_90);
+                const real_t q_tmp_1_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89 + tmp_qloop_75*tmp_qloop_90);
+                const real_t q_tmp_1_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89 + tmp_qloop_80*tmp_qloop_90);
+                const real_t q_tmp_1_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89 + tmp_qloop_85*tmp_qloop_90);
+                const real_t q_tmp_2_2 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_70*tmp_qloop_95);
+                const real_t q_tmp_2_3 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_75*tmp_qloop_95);
+                const real_t q_tmp_2_4 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_80*tmp_qloop_95);
+                const real_t q_tmp_2_5 = tmp_qloop_60*(tmp_qloop_40 - tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_85*tmp_qloop_95);
+                const real_t q_tmp_3_3 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_75 + tmp_qloop_40 - tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_80 + tmp_qloop_40 - tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_60*(tmp_qloop_100*tmp_qloop_85 + tmp_qloop_40 - tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_105*tmp_qloop_80 + tmp_qloop_40);
+                const real_t q_tmp_4_5 = tmp_qloop_60*(-tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_105*tmp_qloop_85 + tmp_qloop_40);
+                const real_t q_tmp_5_5 = tmp_qloop_60*(tmp_qloop_40 + tmp_qloop_83*(tmp_qloop_106 + tmp_qloop_107)*-0.66666666666666667 + tmp_qloop_84*(tmp_qloop_106*1.0 + tmp_qloop_107*1.0) + tmp_qloop_85*(tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_43*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_44)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0b46237496cea2841b26aec36df34d7825dea6ba
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp
@@ -0,0 +1,687 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_53 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_31 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_29 + tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_35 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_36 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_37 = tmp_qloop_34*0.66666666666666667 + tmp_qloop_36*0.66666666666666667;
+                const real_t tmp_qloop_38 = tmp_qloop_33*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q];
+                const real_t tmp_qloop_39 = tmp_qloop_38*(tmp_qloop_34*1.0 + tmp_qloop_36*1.0);
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_30*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_42;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_45 = tmp_qloop_30*0.5;
+                const real_t tmp_qloop_46 = tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_35*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_47 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = tmp_qloop_49 + tmp_qloop_51;
+                const real_t tmp_qloop_54 = pow(tmp_qloop_52, -0.50000000000000000)*tmp_qloop_53*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_48*tmp_qloop_54;
+                const real_t tmp_qloop_56 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_48) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_50);
+                const real_t tmp_qloop_57 = pow(tmp_qloop_52, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_58 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_59 = tmp_qloop_50*tmp_qloop_54;
+                const real_t tmp_qloop_60 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_61 = tmp_qloop_48*tmp_qloop_50;
+                const real_t tmp_qloop_62 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_55 - tmp_qloop_51*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_59 + tmp_qloop_49*tmp_qloop_60) - (tmp_qloop_0*tmp_qloop_59 + tmp_qloop_58*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_55 - tmp_qloop_60*tmp_qloop_61))*_data_q_w[q];
+                const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_64 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_65 = tmp_qloop_63 + tmp_qloop_64;
+                const real_t tmp_qloop_66 = tmp_qloop_40*(tmp_qloop_63*2.0 + tmp_qloop_64*2.0);
+                const real_t tmp_qloop_67 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_69 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_71 = tmp_qloop_40*(tmp_qloop_68*2.0 + tmp_qloop_69*2.0);
+                const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_74 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_75 = tmp_qloop_73 + tmp_qloop_74;
+                const real_t tmp_qloop_76 = tmp_qloop_40*(tmp_qloop_73*2.0 + tmp_qloop_74*2.0);
+                const real_t tmp_qloop_77 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_78 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_79 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_80 = tmp_qloop_78 + tmp_qloop_79;
+                const real_t tmp_qloop_81 = tmp_qloop_40*(tmp_qloop_78*2.0 + tmp_qloop_79*2.0);
+                const real_t tmp_qloop_82 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_83 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_84 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_85 = tmp_qloop_83 + tmp_qloop_84;
+                const real_t tmp_qloop_86 = tmp_qloop_40*(tmp_qloop_83*2.0 + tmp_qloop_84*2.0);
+                const real_t tmp_qloop_87 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_88 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_89 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_90 = tmp_qloop_88*0.66666666666666667 + tmp_qloop_89*0.66666666666666667;
+                const real_t tmp_qloop_91 = tmp_qloop_38*(tmp_qloop_88*1.0 + tmp_qloop_89*1.0);
+                const real_t tmp_qloop_92 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_93 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_94 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_95 = tmp_qloop_93*0.66666666666666667 + tmp_qloop_94*0.66666666666666667;
+                const real_t tmp_qloop_96 = tmp_qloop_38*(tmp_qloop_93*1.0 + tmp_qloop_94*1.0);
+                const real_t tmp_qloop_97 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_98 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_99 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_100 = tmp_qloop_98*0.66666666666666667 + tmp_qloop_99*0.66666666666666667;
+                const real_t tmp_qloop_101 = tmp_qloop_38*(tmp_qloop_98*1.0 + tmp_qloop_99*1.0);
+                const real_t tmp_qloop_102 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_103 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_104 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_105 = tmp_qloop_103*0.66666666666666667 + tmp_qloop_104*0.66666666666666667;
+                const real_t tmp_qloop_106 = tmp_qloop_38*(tmp_qloop_103*1.0 + tmp_qloop_104*1.0);
+                const real_t tmp_qloop_107 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_108 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_109 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_110 = tmp_qloop_108*0.66666666666666667 + tmp_qloop_109*0.66666666666666667;
+                const real_t tmp_qloop_111 = tmp_qloop_38*(tmp_qloop_108*1.0 + tmp_qloop_109*1.0);
+                const real_t tmp_qloop_112 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_46*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_37 + tmp_qloop_39 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_47);
+                const real_t q_tmp_0_1 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_65 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_2 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_70 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_0_3 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_75 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_0_4 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_80 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_0_5 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_85 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_1_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_90 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_90 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_90 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_90 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_90 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_90 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_2_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_95 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_95 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_95 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_95 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_95 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_3_0 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_32 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_65 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_3_2 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_70 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_3_3 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_75 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_80 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_3_5 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_85 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_4_0 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_32 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_65 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_4_2 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_70 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_4_3 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_75 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_80 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_4_5 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_85 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_5_0 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_32 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_65 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_5_2 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_70 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_5_3 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_75 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_5_4 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_80 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_5_5 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_85 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_87 + tmp_qloop_86);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_31 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_29 + tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_35 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_36 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_37 = tmp_qloop_34*0.66666666666666667 + tmp_qloop_36*0.66666666666666667;
+                const real_t tmp_qloop_38 = tmp_qloop_33*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q];
+                const real_t tmp_qloop_39 = tmp_qloop_38*(tmp_qloop_34*1.0 + tmp_qloop_36*1.0);
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_30*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_42;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_45 = tmp_qloop_30*0.5;
+                const real_t tmp_qloop_46 = tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_35*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_47 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = tmp_qloop_49 + tmp_qloop_51;
+                const real_t tmp_qloop_54 = pow(tmp_qloop_52, -0.50000000000000000)*tmp_qloop_53*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_48*tmp_qloop_54;
+                const real_t tmp_qloop_56 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_48) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_50);
+                const real_t tmp_qloop_57 = pow(tmp_qloop_52, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_58 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_59 = tmp_qloop_50*tmp_qloop_54;
+                const real_t tmp_qloop_60 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_61 = tmp_qloop_48*tmp_qloop_50;
+                const real_t tmp_qloop_62 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_55 - tmp_qloop_51*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_59 + tmp_qloop_49*tmp_qloop_60) - (tmp_qloop_0*tmp_qloop_59 + tmp_qloop_58*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_55 - tmp_qloop_60*tmp_qloop_61))*_data_q_w[q];
+                const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_64 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_65 = tmp_qloop_63 + tmp_qloop_64;
+                const real_t tmp_qloop_66 = tmp_qloop_40*(tmp_qloop_63*2.0 + tmp_qloop_64*2.0);
+                const real_t tmp_qloop_67 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_69 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_71 = tmp_qloop_40*(tmp_qloop_68*2.0 + tmp_qloop_69*2.0);
+                const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_74 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_75 = tmp_qloop_73 + tmp_qloop_74;
+                const real_t tmp_qloop_76 = tmp_qloop_40*(tmp_qloop_73*2.0 + tmp_qloop_74*2.0);
+                const real_t tmp_qloop_77 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_78 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_79 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_80 = tmp_qloop_78 + tmp_qloop_79;
+                const real_t tmp_qloop_81 = tmp_qloop_40*(tmp_qloop_78*2.0 + tmp_qloop_79*2.0);
+                const real_t tmp_qloop_82 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_83 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_84 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_85 = tmp_qloop_83 + tmp_qloop_84;
+                const real_t tmp_qloop_86 = tmp_qloop_40*(tmp_qloop_83*2.0 + tmp_qloop_84*2.0);
+                const real_t tmp_qloop_87 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_88 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_89 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_90 = tmp_qloop_88*0.66666666666666667 + tmp_qloop_89*0.66666666666666667;
+                const real_t tmp_qloop_91 = tmp_qloop_38*(tmp_qloop_88*1.0 + tmp_qloop_89*1.0);
+                const real_t tmp_qloop_92 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_93 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_94 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_95 = tmp_qloop_93*0.66666666666666667 + tmp_qloop_94*0.66666666666666667;
+                const real_t tmp_qloop_96 = tmp_qloop_38*(tmp_qloop_93*1.0 + tmp_qloop_94*1.0);
+                const real_t tmp_qloop_97 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_98 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_99 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_100 = tmp_qloop_98*0.66666666666666667 + tmp_qloop_99*0.66666666666666667;
+                const real_t tmp_qloop_101 = tmp_qloop_38*(tmp_qloop_98*1.0 + tmp_qloop_99*1.0);
+                const real_t tmp_qloop_102 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_103 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_104 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_105 = tmp_qloop_103*0.66666666666666667 + tmp_qloop_104*0.66666666666666667;
+                const real_t tmp_qloop_106 = tmp_qloop_38*(tmp_qloop_103*1.0 + tmp_qloop_104*1.0);
+                const real_t tmp_qloop_107 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_108 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_109 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_110 = tmp_qloop_108*0.66666666666666667 + tmp_qloop_109*0.66666666666666667;
+                const real_t tmp_qloop_111 = tmp_qloop_38*(tmp_qloop_108*1.0 + tmp_qloop_109*1.0);
+                const real_t tmp_qloop_112 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_46*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_37 + tmp_qloop_39 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_47);
+                const real_t q_tmp_0_1 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_65 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_2 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_70 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_0_3 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_75 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_0_4 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_80 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_0_5 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_85 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_1_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_90 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_90 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_90 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_90 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_90 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_90 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_2_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_95 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_95 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_95 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_95 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_95 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_3_0 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_32 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_65 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_3_2 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_70 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_3_3 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_75 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_80 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_3_5 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_85 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_4_0 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_32 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_65 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_4_2 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_70 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_4_3 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_75 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_80 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_4_5 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_85 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_5_0 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_32 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_65 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_5_2 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_70 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_5_3 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_75 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_5_4 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_80 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_5_5 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_85 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_87 + tmp_qloop_86);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9b6d0ca7f0f25f569d77fd3e604b789d361d7daa
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp
@@ -0,0 +1,845 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_0_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_53 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_31 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_29 + tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_35 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_36 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_37 = tmp_qloop_34*0.66666666666666667 + tmp_qloop_36*0.66666666666666667;
+                const real_t tmp_qloop_38 = tmp_qloop_33*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q];
+                const real_t tmp_qloop_39 = tmp_qloop_38*(tmp_qloop_34*1.0 + tmp_qloop_36*1.0);
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q] + tmp_qloop_30*1.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_42;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_45 = tmp_qloop_30*0.5;
+                const real_t tmp_qloop_46 = tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[q] + tmp_qloop_35*0.5*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[q];
+                const real_t tmp_qloop_47 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = tmp_qloop_49 + tmp_qloop_51;
+                const real_t tmp_qloop_54 = pow(tmp_qloop_52, -0.50000000000000000)*tmp_qloop_53*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_48*tmp_qloop_54;
+                const real_t tmp_qloop_56 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_48) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_50);
+                const real_t tmp_qloop_57 = pow(tmp_qloop_52, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_58 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_59 = tmp_qloop_50*tmp_qloop_54;
+                const real_t tmp_qloop_60 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_61 = tmp_qloop_48*tmp_qloop_50;
+                const real_t tmp_qloop_62 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_55 - tmp_qloop_51*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_59 + tmp_qloop_49*tmp_qloop_60) - (tmp_qloop_0*tmp_qloop_59 + tmp_qloop_58*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_55 - tmp_qloop_60*tmp_qloop_61))*_data_q_w[q];
+                const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_64 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_65 = tmp_qloop_63 + tmp_qloop_64;
+                const real_t tmp_qloop_66 = tmp_qloop_40*(tmp_qloop_63*2.0 + tmp_qloop_64*2.0);
+                const real_t tmp_qloop_67 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_69 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_71 = tmp_qloop_40*(tmp_qloop_68*2.0 + tmp_qloop_69*2.0);
+                const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_74 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_75 = tmp_qloop_73 + tmp_qloop_74;
+                const real_t tmp_qloop_76 = tmp_qloop_40*(tmp_qloop_73*2.0 + tmp_qloop_74*2.0);
+                const real_t tmp_qloop_77 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_78 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_79 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_80 = tmp_qloop_78 + tmp_qloop_79;
+                const real_t tmp_qloop_81 = tmp_qloop_40*(tmp_qloop_78*2.0 + tmp_qloop_79*2.0);
+                const real_t tmp_qloop_82 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_83 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_84 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_85 = tmp_qloop_83 + tmp_qloop_84;
+                const real_t tmp_qloop_86 = tmp_qloop_40*(tmp_qloop_83*2.0 + tmp_qloop_84*2.0);
+                const real_t tmp_qloop_87 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_88 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_89 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_90 = tmp_qloop_88*0.66666666666666667 + tmp_qloop_89*0.66666666666666667;
+                const real_t tmp_qloop_91 = tmp_qloop_38*(tmp_qloop_88*1.0 + tmp_qloop_89*1.0);
+                const real_t tmp_qloop_92 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 1] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 1] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_93 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_94 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_95 = tmp_qloop_93*0.66666666666666667 + tmp_qloop_94*0.66666666666666667;
+                const real_t tmp_qloop_96 = tmp_qloop_38*(tmp_qloop_93*1.0 + tmp_qloop_94*1.0);
+                const real_t tmp_qloop_97 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 2] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 2] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_98 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_99 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_100 = tmp_qloop_98*0.66666666666666667 + tmp_qloop_99*0.66666666666666667;
+                const real_t tmp_qloop_101 = tmp_qloop_38*(tmp_qloop_98*1.0 + tmp_qloop_99*1.0);
+                const real_t tmp_qloop_102 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 3] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 3] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_103 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_104 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_105 = tmp_qloop_103*0.66666666666666667 + tmp_qloop_104*0.66666666666666667;
+                const real_t tmp_qloop_106 = tmp_qloop_38*(tmp_qloop_103*1.0 + tmp_qloop_104*1.0);
+                const real_t tmp_qloop_107 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 4] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 4] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_108 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_109 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_110 = tmp_qloop_108*0.66666666666666667 + tmp_qloop_109*0.66666666666666667;
+                const real_t tmp_qloop_111 = tmp_qloop_38*(tmp_qloop_108*1.0 + tmp_qloop_109*1.0);
+                const real_t tmp_qloop_112 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[6*q + 5] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[6*q + 5] + tmp_qloop_46*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_37 + tmp_qloop_39 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_47);
+                const real_t q_tmp_0_1 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_65 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_2 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_70 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_0_3 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_75 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_0_4 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_80 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_0_5 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_85 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_1_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_90 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_90 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_90 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_90 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_90 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_90 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_2_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_95 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_95 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_95 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_95 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_95 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_3_0 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_32 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_65 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_3_2 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_70 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_3_3 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_75 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_80 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_3_5 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_85 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_4_0 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_32 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_65 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_4_2 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_70 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_4_3 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_75 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_80 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_4_5 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_85 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_5_0 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_32 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_65 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_5_2 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_70 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_5_3 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_75 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_5_4 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_80 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_5_5 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_85 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_87 + tmp_qloop_86);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_31 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_29 + tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_35 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_36 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_37 = tmp_qloop_34*0.66666666666666667 + tmp_qloop_36*0.66666666666666667;
+                const real_t tmp_qloop_38 = tmp_qloop_33*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q];
+                const real_t tmp_qloop_39 = tmp_qloop_38*(tmp_qloop_34*1.0 + tmp_qloop_36*1.0);
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q] + tmp_qloop_30*1.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_42;
+                const real_t tmp_qloop_44 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_45 = tmp_qloop_30*0.5;
+                const real_t tmp_qloop_46 = tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[q] + tmp_qloop_35*0.5*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[q];
+                const real_t tmp_qloop_47 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_48 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_49 = (tmp_qloop_48*tmp_qloop_48);
+                const real_t tmp_qloop_50 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = tmp_qloop_49 + tmp_qloop_51;
+                const real_t tmp_qloop_54 = pow(tmp_qloop_52, -0.50000000000000000)*tmp_qloop_53*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_48*tmp_qloop_54;
+                const real_t tmp_qloop_56 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_48) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_50);
+                const real_t tmp_qloop_57 = pow(tmp_qloop_52, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_58 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_59 = tmp_qloop_50*tmp_qloop_54;
+                const real_t tmp_qloop_60 = tmp_qloop_57*(radRayVertex + tmp_qloop_53*tmp_qloop_56);
+                const real_t tmp_qloop_61 = tmp_qloop_48*tmp_qloop_50;
+                const real_t tmp_qloop_62 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_55 - tmp_qloop_51*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_59 + tmp_qloop_49*tmp_qloop_60) - (tmp_qloop_0*tmp_qloop_59 + tmp_qloop_58*tmp_qloop_61)*(tmp_qloop_12*tmp_qloop_55 - tmp_qloop_60*tmp_qloop_61))*_data_q_w[q];
+                const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_64 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_65 = tmp_qloop_63 + tmp_qloop_64;
+                const real_t tmp_qloop_66 = tmp_qloop_40*(tmp_qloop_63*2.0 + tmp_qloop_64*2.0);
+                const real_t tmp_qloop_67 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_42;
+                const real_t tmp_qloop_68 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_69 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69;
+                const real_t tmp_qloop_71 = tmp_qloop_40*(tmp_qloop_68*2.0 + tmp_qloop_69*2.0);
+                const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_42;
+                const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_74 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_75 = tmp_qloop_73 + tmp_qloop_74;
+                const real_t tmp_qloop_76 = tmp_qloop_40*(tmp_qloop_73*2.0 + tmp_qloop_74*2.0);
+                const real_t tmp_qloop_77 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_42;
+                const real_t tmp_qloop_78 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_79 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_80 = tmp_qloop_78 + tmp_qloop_79;
+                const real_t tmp_qloop_81 = tmp_qloop_40*(tmp_qloop_78*2.0 + tmp_qloop_79*2.0);
+                const real_t tmp_qloop_82 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_42;
+                const real_t tmp_qloop_83 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_84 = tmp_qloop_30*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_85 = tmp_qloop_83 + tmp_qloop_84;
+                const real_t tmp_qloop_86 = tmp_qloop_40*(tmp_qloop_83*2.0 + tmp_qloop_84*2.0);
+                const real_t tmp_qloop_87 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_42;
+                const real_t tmp_qloop_88 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_89 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_90 = tmp_qloop_88*0.66666666666666667 + tmp_qloop_89*0.66666666666666667;
+                const real_t tmp_qloop_91 = tmp_qloop_38*(tmp_qloop_88*1.0 + tmp_qloop_89*1.0);
+                const real_t tmp_qloop_92 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 1] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 1] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_93 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_94 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_95 = tmp_qloop_93*0.66666666666666667 + tmp_qloop_94*0.66666666666666667;
+                const real_t tmp_qloop_96 = tmp_qloop_38*(tmp_qloop_93*1.0 + tmp_qloop_94*1.0);
+                const real_t tmp_qloop_97 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 2] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 2] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_98 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_99 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_100 = tmp_qloop_98*0.66666666666666667 + tmp_qloop_99*0.66666666666666667;
+                const real_t tmp_qloop_101 = tmp_qloop_38*(tmp_qloop_98*1.0 + tmp_qloop_99*1.0);
+                const real_t tmp_qloop_102 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 3] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 3] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_103 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_104 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_105 = tmp_qloop_103*0.66666666666666667 + tmp_qloop_104*0.66666666666666667;
+                const real_t tmp_qloop_106 = tmp_qloop_38*(tmp_qloop_103*1.0 + tmp_qloop_104*1.0);
+                const real_t tmp_qloop_107 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 4] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 4] + tmp_qloop_46*2.0;
+                const real_t tmp_qloop_108 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_109 = tmp_qloop_35*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_110 = tmp_qloop_108*0.66666666666666667 + tmp_qloop_109*0.66666666666666667;
+                const real_t tmp_qloop_111 = tmp_qloop_38*(tmp_qloop_108*1.0 + tmp_qloop_109*1.0);
+                const real_t tmp_qloop_112 = tmp_qloop_44*2.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[6*q + 5] + tmp_qloop_45*2.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[6*q + 5] + tmp_qloop_46*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_37 + tmp_qloop_39 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_47);
+                const real_t q_tmp_0_1 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_65 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_0_2 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_70 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_0_3 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_75 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_0_4 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_80 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_0_5 = tmp_qloop_62*(-tmp_qloop_37*tmp_qloop_85 + tmp_qloop_39 + tmp_qloop_47*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_1_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_90 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_90 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_90 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_90 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_90 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_1_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_90 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_92 + tmp_qloop_91);
+                const real_t q_tmp_2_0 = tmp_qloop_62*(-tmp_qloop_32*tmp_qloop_95 + tmp_qloop_41 + tmp_qloop_43*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_1 = tmp_qloop_62*(-tmp_qloop_65*tmp_qloop_95 + tmp_qloop_66 + tmp_qloop_67*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_2 = tmp_qloop_62*(-tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71 + tmp_qloop_72*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_3 = tmp_qloop_62*(-tmp_qloop_75*tmp_qloop_95 + tmp_qloop_76 + tmp_qloop_77*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_4 = tmp_qloop_62*(-tmp_qloop_80*tmp_qloop_95 + tmp_qloop_81 + tmp_qloop_82*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_2_5 = tmp_qloop_62*(-tmp_qloop_85*tmp_qloop_95 + tmp_qloop_86 + tmp_qloop_87*tmp_qloop_97 + tmp_qloop_96);
+                const real_t q_tmp_3_0 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_32 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_65 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_3_2 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_70 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_3_3 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_75 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_3_4 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_80 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_3_5 = tmp_qloop_62*(-tmp_qloop_100*tmp_qloop_85 + tmp_qloop_101 + tmp_qloop_102*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_4_0 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_32 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_65 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_4_2 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_70 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_4_3 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_75 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_4_4 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_80 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_4_5 = tmp_qloop_62*(-tmp_qloop_105*tmp_qloop_85 + tmp_qloop_106 + tmp_qloop_107*tmp_qloop_87 + tmp_qloop_86);
+                const real_t q_tmp_5_0 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_32 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_43 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_65 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_67 + tmp_qloop_66);
+                const real_t q_tmp_5_2 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_70 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_72 + tmp_qloop_71);
+                const real_t q_tmp_5_3 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_75 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_77 + tmp_qloop_76);
+                const real_t q_tmp_5_4 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_80 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_82 + tmp_qloop_81);
+                const real_t q_tmp_5_5 = tmp_qloop_62*(-tmp_qloop_110*tmp_qloop_85 + tmp_qloop_111 + tmp_qloop_112*tmp_qloop_87 + tmp_qloop_86);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e376c3e9f6421adeb97adb68255b97aae8499c11
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp
@@ -0,0 +1,691 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_55 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_29 + tmp_qloop_32;
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_37 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_38 = tmp_qloop_35*0.66666666666666667 + tmp_qloop_37*0.66666666666666667;
+                const real_t tmp_qloop_39 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_34*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_42*(tmp_qloop_35*1.0 + tmp_qloop_37*1.0);
+                const real_t tmp_qloop_44 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_24*tmp_qloop_31 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_47 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_48 = tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_36*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_49 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_50 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_53 = (tmp_qloop_52*tmp_qloop_52);
+                const real_t tmp_qloop_54 = tmp_qloop_51 + tmp_qloop_53;
+                const real_t tmp_qloop_56 = pow(tmp_qloop_54, -0.50000000000000000)*tmp_qloop_55*1.0;
+                const real_t tmp_qloop_57 = tmp_qloop_50*tmp_qloop_56;
+                const real_t tmp_qloop_58 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_52) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_50);
+                const real_t tmp_qloop_59 = pow(tmp_qloop_54, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_60 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_61 = tmp_qloop_52*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_63 = tmp_qloop_50*tmp_qloop_52;
+                const real_t tmp_qloop_64 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_57 - tmp_qloop_62*tmp_qloop_63)*(tmp_qloop_12*tmp_qloop_61 + tmp_qloop_60*tmp_qloop_63) - (tmp_qloop_0*tmp_qloop_61 + tmp_qloop_51*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_57 - tmp_qloop_53*tmp_qloop_60))*_data_q_w[q];
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_66 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_40*(tmp_qloop_65*2.0 + tmp_qloop_66*2.0);
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_71 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_40*(tmp_qloop_70*2.0 + tmp_qloop_71*2.0);
+                const real_t tmp_qloop_74 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_76 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_40*(tmp_qloop_75*2.0 + tmp_qloop_76*2.0);
+                const real_t tmp_qloop_79 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_81 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_40*(tmp_qloop_80*2.0 + tmp_qloop_81*2.0);
+                const real_t tmp_qloop_84 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_86 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_87 = tmp_qloop_85 + tmp_qloop_86;
+                const real_t tmp_qloop_88 = tmp_qloop_40*(tmp_qloop_85*2.0 + tmp_qloop_86*2.0);
+                const real_t tmp_qloop_89 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_90 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_91 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_42*(tmp_qloop_90*1.0 + tmp_qloop_91*1.0);
+                const real_t tmp_qloop_94 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_96 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_42*(tmp_qloop_95*1.0 + tmp_qloop_96*1.0);
+                const real_t tmp_qloop_99 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_101 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_42*(tmp_qloop_100*1.0 + tmp_qloop_101*1.0);
+                const real_t tmp_qloop_104 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_106 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_107 = tmp_qloop_105*0.66666666666666667 + tmp_qloop_106*0.66666666666666667;
+                const real_t tmp_qloop_108 = tmp_qloop_42*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0);
+                const real_t tmp_qloop_109 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_110 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_111 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_112 = tmp_qloop_110*0.66666666666666667 + tmp_qloop_111*0.66666666666666667;
+                const real_t tmp_qloop_113 = tmp_qloop_42*(tmp_qloop_110*1.0 + tmp_qloop_111*1.0);
+                const real_t tmp_qloop_114 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_48*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_38 + tmp_qloop_41 + tmp_qloop_43 + tmp_qloop_45*tmp_qloop_49);
+                const real_t q_tmp_0_1 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_67 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_2 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_72 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_0_3 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_77 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_0_4 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_82 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_0_5 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_87 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_1_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_92 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_92 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_2_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_97 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_97 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_97 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_3_0 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_33 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_67 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_3_2 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_72 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_3_3 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_3_5 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_87 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_4_0 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_33 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_67 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_4_2 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_72 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_4_3 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_77 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_4 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_82 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_4_5 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_87 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_5_0 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_33 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_67 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_5_2 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_72 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_5_3 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_77 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_5_4 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_82 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_5_5 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_87 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_89 + tmp_qloop_88);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_29 + tmp_qloop_32;
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_37 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_38 = tmp_qloop_35*0.66666666666666667 + tmp_qloop_37*0.66666666666666667;
+                const real_t tmp_qloop_39 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_34*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_42*(tmp_qloop_35*1.0 + tmp_qloop_37*1.0);
+                const real_t tmp_qloop_44 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_24*tmp_qloop_31 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_47 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_48 = tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_36*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_49 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_50 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_53 = (tmp_qloop_52*tmp_qloop_52);
+                const real_t tmp_qloop_54 = tmp_qloop_51 + tmp_qloop_53;
+                const real_t tmp_qloop_56 = pow(tmp_qloop_54, -0.50000000000000000)*tmp_qloop_55*1.0;
+                const real_t tmp_qloop_57 = tmp_qloop_50*tmp_qloop_56;
+                const real_t tmp_qloop_58 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_52) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_50);
+                const real_t tmp_qloop_59 = pow(tmp_qloop_54, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_60 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_61 = tmp_qloop_52*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_63 = tmp_qloop_50*tmp_qloop_52;
+                const real_t tmp_qloop_64 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_57 - tmp_qloop_62*tmp_qloop_63)*(tmp_qloop_12*tmp_qloop_61 + tmp_qloop_60*tmp_qloop_63) - (tmp_qloop_0*tmp_qloop_61 + tmp_qloop_51*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_57 - tmp_qloop_53*tmp_qloop_60))*_data_q_w[q];
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_66 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_40*(tmp_qloop_65*2.0 + tmp_qloop_66*2.0);
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_71 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_40*(tmp_qloop_70*2.0 + tmp_qloop_71*2.0);
+                const real_t tmp_qloop_74 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_76 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_40*(tmp_qloop_75*2.0 + tmp_qloop_76*2.0);
+                const real_t tmp_qloop_79 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_81 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_40*(tmp_qloop_80*2.0 + tmp_qloop_81*2.0);
+                const real_t tmp_qloop_84 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_86 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_87 = tmp_qloop_85 + tmp_qloop_86;
+                const real_t tmp_qloop_88 = tmp_qloop_40*(tmp_qloop_85*2.0 + tmp_qloop_86*2.0);
+                const real_t tmp_qloop_89 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_90 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_91 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_42*(tmp_qloop_90*1.0 + tmp_qloop_91*1.0);
+                const real_t tmp_qloop_94 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_96 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_42*(tmp_qloop_95*1.0 + tmp_qloop_96*1.0);
+                const real_t tmp_qloop_99 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_101 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_42*(tmp_qloop_100*1.0 + tmp_qloop_101*1.0);
+                const real_t tmp_qloop_104 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_106 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_107 = tmp_qloop_105*0.66666666666666667 + tmp_qloop_106*0.66666666666666667;
+                const real_t tmp_qloop_108 = tmp_qloop_42*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0);
+                const real_t tmp_qloop_109 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_110 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_111 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_112 = tmp_qloop_110*0.66666666666666667 + tmp_qloop_111*0.66666666666666667;
+                const real_t tmp_qloop_113 = tmp_qloop_42*(tmp_qloop_110*1.0 + tmp_qloop_111*1.0);
+                const real_t tmp_qloop_114 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_48*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_38 + tmp_qloop_41 + tmp_qloop_43 + tmp_qloop_45*tmp_qloop_49);
+                const real_t q_tmp_0_1 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_67 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_2 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_72 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_0_3 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_77 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_0_4 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_82 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_0_5 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_87 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_1_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_92 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_92 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_2_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_97 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_97 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_97 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_3_0 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_33 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_67 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_3_2 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_72 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_3_3 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_3_5 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_87 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_4_0 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_33 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_67 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_4_2 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_72 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_4_3 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_77 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_4 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_82 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_4_5 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_87 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_5_0 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_33 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_67 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_5_2 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_72 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_5_3 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_77 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_5_4 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_82 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_5_5 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_87 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_89 + tmp_qloop_88);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..67a007621f60a6926069176dbf0f3cc9bb6a26a3
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,849 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (tmp_qloop_1*tmp_qloop_14 - tmp_qloop_11*tmp_qloop_13);
+       const real_t tmp_qloop_55 = tmp_qloop_15*1.0 / (-tmp_qloop_0*tmp_qloop_14 + tmp_qloop_11*tmp_qloop_12);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_29 + tmp_qloop_32;
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_37 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_38 = tmp_qloop_35*0.66666666666666667 + tmp_qloop_37*0.66666666666666667;
+                const real_t tmp_qloop_39 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_34*2.0*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q];
+                const real_t tmp_qloop_43 = tmp_qloop_42*(tmp_qloop_35*1.0 + tmp_qloop_37*1.0);
+                const real_t tmp_qloop_44 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[q];
+                const real_t tmp_qloop_45 = tmp_qloop_24*tmp_qloop_31 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_47 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_48 = tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_36*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_49 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_50 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_53 = (tmp_qloop_52*tmp_qloop_52);
+                const real_t tmp_qloop_54 = tmp_qloop_51 + tmp_qloop_53;
+                const real_t tmp_qloop_56 = pow(tmp_qloop_54, -0.50000000000000000)*tmp_qloop_55*1.0;
+                const real_t tmp_qloop_57 = tmp_qloop_50*tmp_qloop_56;
+                const real_t tmp_qloop_58 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_52) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_50);
+                const real_t tmp_qloop_59 = pow(tmp_qloop_54, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_60 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_61 = tmp_qloop_52*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_63 = tmp_qloop_50*tmp_qloop_52;
+                const real_t tmp_qloop_64 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_57 - tmp_qloop_62*tmp_qloop_63)*(tmp_qloop_12*tmp_qloop_61 + tmp_qloop_60*tmp_qloop_63) - (tmp_qloop_0*tmp_qloop_61 + tmp_qloop_51*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_57 - tmp_qloop_53*tmp_qloop_60))*_data_q_w[q];
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_66 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_40*(tmp_qloop_65*2.0 + tmp_qloop_66*2.0);
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_71 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_40*(tmp_qloop_70*2.0 + tmp_qloop_71*2.0);
+                const real_t tmp_qloop_74 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_76 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_40*(tmp_qloop_75*2.0 + tmp_qloop_76*2.0);
+                const real_t tmp_qloop_79 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_81 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_40*(tmp_qloop_80*2.0 + tmp_qloop_81*2.0);
+                const real_t tmp_qloop_84 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_86 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5];
+                const real_t tmp_qloop_87 = tmp_qloop_85 + tmp_qloop_86;
+                const real_t tmp_qloop_88 = tmp_qloop_40*(tmp_qloop_85*2.0 + tmp_qloop_86*2.0);
+                const real_t tmp_qloop_89 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_90 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_91 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_42*(tmp_qloop_90*1.0 + tmp_qloop_91*1.0);
+                const real_t tmp_qloop_94 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_96 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_42*(tmp_qloop_95*1.0 + tmp_qloop_96*1.0);
+                const real_t tmp_qloop_99 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_101 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_42*(tmp_qloop_100*1.0 + tmp_qloop_101*1.0);
+                const real_t tmp_qloop_104 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_106 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_107 = tmp_qloop_105*0.66666666666666667 + tmp_qloop_106*0.66666666666666667;
+                const real_t tmp_qloop_108 = tmp_qloop_42*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0);
+                const real_t tmp_qloop_109 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_110 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_111 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_112 = tmp_qloop_110*0.66666666666666667 + tmp_qloop_111*0.66666666666666667;
+                const real_t tmp_qloop_113 = tmp_qloop_42*(tmp_qloop_110*1.0 + tmp_qloop_111*1.0);
+                const real_t tmp_qloop_114 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_48*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_38 + tmp_qloop_41 + tmp_qloop_43 + tmp_qloop_45*tmp_qloop_49);
+                const real_t q_tmp_0_1 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_67 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_2 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_72 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_0_3 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_77 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_0_4 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_82 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_0_5 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_87 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_1_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_92 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_92 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_2_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_97 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_97 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_97 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_3_0 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_33 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_67 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_3_2 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_72 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_3_3 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_3_5 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_87 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_4_0 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_33 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_67 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_4_2 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_72 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_4_3 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_77 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_4 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_82 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_4_5 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_87 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_5_0 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_33 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_67 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_5_2 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_72 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_5_3 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_77 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_5_4 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_82 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_5_5 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_87 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_89 + tmp_qloop_88);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_0 = 0.0;
+             real_t q_acc_2_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_0 = 0.0;
+             real_t q_acc_3_1 = 0.0;
+             real_t q_acc_3_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_0 = 0.0;
+             real_t q_acc_4_1 = 0.0;
+             real_t q_acc_4_2 = 0.0;
+             real_t q_acc_4_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_0 = 0.0;
+             real_t q_acc_5_1 = 0.0;
+             real_t q_acc_5_2 = 0.0;
+             real_t q_acc_5_3 = 0.0;
+             real_t q_acc_5_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_3 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_qloop_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_6 = p_affine_0_0 - p_affine_2_0;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_1 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_7 = p_affine_0_0 - tmp_qloop_5*_data_q_p_0[q] - tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_8 = (tmp_qloop_7*tmp_qloop_7);
+                const real_t tmp_qloop_9 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_10 = tmp_qloop_8 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(-tmp_qloop_1*(-rayVertex_1 + tmp_qloop_4) + tmp_qloop_13*(-rayVertex_0 + tmp_qloop_7));
+                const real_t tmp_qloop_21 = -tmp_qloop_1*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_17*tmp_qloop_7;
+                const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_24 = tmp_qloop_13*tmp_qloop_22 + tmp_qloop_23*tmp_qloop_9;
+                const real_t tmp_qloop_25 = tmp_qloop_1*tmp_qloop_22 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_13*tmp_qloop_18 - tmp_qloop_23*tmp_qloop_4*tmp_qloop_7;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_21*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_21*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q];
+                const real_t tmp_qloop_30 = -tmp_qloop_26;
+                const real_t tmp_qloop_31 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q];
+                const real_t tmp_qloop_32 = tmp_qloop_30*tmp_qloop_31;
+                const real_t tmp_qloop_33 = tmp_qloop_29 + tmp_qloop_32;
+                const real_t tmp_qloop_34 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_35 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_36 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_37 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_38 = tmp_qloop_35*0.66666666666666667 + tmp_qloop_37*0.66666666666666667;
+                const real_t tmp_qloop_39 = tmp_qloop_27*tmp_qloop_30;
+                const real_t tmp_qloop_40 = tmp_qloop_28*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_39*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q];
+                const real_t tmp_qloop_41 = tmp_qloop_40*(tmp_qloop_29*2.0 + tmp_qloop_32*2.0);
+                const real_t tmp_qloop_42 = tmp_qloop_34*2.0*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q];
+                const real_t tmp_qloop_43 = tmp_qloop_42*(tmp_qloop_35*1.0 + tmp_qloop_37*1.0);
+                const real_t tmp_qloop_44 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[q] + tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[q];
+                const real_t tmp_qloop_45 = tmp_qloop_24*tmp_qloop_31 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q] + tmp_qloop_44;
+                const real_t tmp_qloop_46 = tmp_qloop_28*0.5;
+                const real_t tmp_qloop_47 = tmp_qloop_39*0.5;
+                const real_t tmp_qloop_48 = tmp_qloop_34*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_36*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_49 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_50 = -p_affine_0_0 + tmp_qloop_5*_data_q_p_0[q] + tmp_qloop_6*_data_q_p_1[q];
+                const real_t tmp_qloop_51 = (tmp_qloop_50*tmp_qloop_50);
+                const real_t tmp_qloop_52 = -p_affine_0_1 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_53 = (tmp_qloop_52*tmp_qloop_52);
+                const real_t tmp_qloop_54 = tmp_qloop_51 + tmp_qloop_53;
+                const real_t tmp_qloop_56 = pow(tmp_qloop_54, -0.50000000000000000)*tmp_qloop_55*1.0;
+                const real_t tmp_qloop_57 = tmp_qloop_50*tmp_qloop_56;
+                const real_t tmp_qloop_58 = tmp_qloop_0*(rayVertex_1 + tmp_qloop_52) - tmp_qloop_12*(rayVertex_0 + tmp_qloop_50);
+                const real_t tmp_qloop_59 = pow(tmp_qloop_54, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_60 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_61 = tmp_qloop_52*tmp_qloop_56;
+                const real_t tmp_qloop_62 = tmp_qloop_59*(radRayVertex + tmp_qloop_55*tmp_qloop_58);
+                const real_t tmp_qloop_63 = tmp_qloop_50*tmp_qloop_52;
+                const real_t tmp_qloop_64 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_57 - tmp_qloop_62*tmp_qloop_63)*(tmp_qloop_12*tmp_qloop_61 + tmp_qloop_60*tmp_qloop_63) - (tmp_qloop_0*tmp_qloop_61 + tmp_qloop_51*tmp_qloop_62)*(tmp_qloop_12*tmp_qloop_57 - tmp_qloop_53*tmp_qloop_60))*_data_q_w[q];
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_66 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_40*(tmp_qloop_65*2.0 + tmp_qloop_66*2.0);
+                const real_t tmp_qloop_69 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 1] + tmp_qloop_44;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_71 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_40*(tmp_qloop_70*2.0 + tmp_qloop_71*2.0);
+                const real_t tmp_qloop_74 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 2] + tmp_qloop_44;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_76 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_40*(tmp_qloop_75*2.0 + tmp_qloop_76*2.0);
+                const real_t tmp_qloop_79 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 3] + tmp_qloop_44;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_81 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_40*(tmp_qloop_80*2.0 + tmp_qloop_81*2.0);
+                const real_t tmp_qloop_84 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 4] + tmp_qloop_44;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_86 = tmp_qloop_39*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5];
+                const real_t tmp_qloop_87 = tmp_qloop_85 + tmp_qloop_86;
+                const real_t tmp_qloop_88 = tmp_qloop_40*(tmp_qloop_85*2.0 + tmp_qloop_86*2.0);
+                const real_t tmp_qloop_89 = tmp_qloop_34*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[6*q + 5] + tmp_qloop_44;
+                const real_t tmp_qloop_90 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_91 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_42*(tmp_qloop_90*1.0 + tmp_qloop_91*1.0);
+                const real_t tmp_qloop_94 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_96 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_42*(tmp_qloop_95*1.0 + tmp_qloop_96*1.0);
+                const real_t tmp_qloop_99 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_101 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_42*(tmp_qloop_100*1.0 + tmp_qloop_101*1.0);
+                const real_t tmp_qloop_104 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_106 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_107 = tmp_qloop_105*0.66666666666666667 + tmp_qloop_106*0.66666666666666667;
+                const real_t tmp_qloop_108 = tmp_qloop_42*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0);
+                const real_t tmp_qloop_109 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_48*2.0;
+                const real_t tmp_qloop_110 = tmp_qloop_34*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_111 = tmp_qloop_36*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_112 = tmp_qloop_110*0.66666666666666667 + tmp_qloop_111*0.66666666666666667;
+                const real_t tmp_qloop_113 = tmp_qloop_42*(tmp_qloop_110*1.0 + tmp_qloop_111*1.0);
+                const real_t tmp_qloop_114 = tmp_qloop_46*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_47*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_48*2.0;
+                const real_t q_tmp_0_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_38 + tmp_qloop_41 + tmp_qloop_43 + tmp_qloop_45*tmp_qloop_49);
+                const real_t q_tmp_0_1 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_67 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_0_2 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_72 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_0_3 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_77 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_0_4 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_82 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_0_5 = tmp_qloop_64*(-tmp_qloop_38*tmp_qloop_87 + tmp_qloop_43 + tmp_qloop_49*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_1_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_92 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_1_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_92 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_94 + tmp_qloop_93);
+                const real_t q_tmp_2_0 = tmp_qloop_64*(-tmp_qloop_33*tmp_qloop_97 + tmp_qloop_41 + tmp_qloop_45*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_1 = tmp_qloop_64*(-tmp_qloop_67*tmp_qloop_97 + tmp_qloop_68 + tmp_qloop_69*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_2 = tmp_qloop_64*(-tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73 + tmp_qloop_74*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_3 = tmp_qloop_64*(-tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78 + tmp_qloop_79*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_4 = tmp_qloop_64*(-tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83 + tmp_qloop_84*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_2_5 = tmp_qloop_64*(-tmp_qloop_87*tmp_qloop_97 + tmp_qloop_88 + tmp_qloop_89*tmp_qloop_99 + tmp_qloop_98);
+                const real_t q_tmp_3_0 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_33 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_3_1 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_67 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_3_2 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_72 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_3_3 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_3_4 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_3_5 = tmp_qloop_64*(-tmp_qloop_102*tmp_qloop_87 + tmp_qloop_103 + tmp_qloop_104*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_4_0 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_33 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_4_1 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_67 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_4_2 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_72 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_4_3 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_77 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_4_4 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_82 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_4_5 = tmp_qloop_64*(-tmp_qloop_107*tmp_qloop_87 + tmp_qloop_108 + tmp_qloop_109*tmp_qloop_89 + tmp_qloop_88);
+                const real_t q_tmp_5_0 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_33 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_45 + tmp_qloop_41);
+                const real_t q_tmp_5_1 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_67 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_69 + tmp_qloop_68);
+                const real_t q_tmp_5_2 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_72 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_74 + tmp_qloop_73);
+                const real_t q_tmp_5_3 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_77 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_79 + tmp_qloop_78);
+                const real_t q_tmp_5_4 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_82 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_84 + tmp_qloop_83);
+                const real_t q_tmp_5_5 = tmp_qloop_64*(-tmp_qloop_112*tmp_qloop_87 + tmp_qloop_113 + tmp_qloop_114*tmp_qloop_89 + tmp_qloop_88);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_0 = q_acc_1_0 + q_tmp_1_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_0 = q_acc_2_0 + q_tmp_2_0;
+                q_acc_2_1 = q_acc_2_1 + q_tmp_2_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_0 = q_acc_3_0 + q_tmp_3_0;
+                q_acc_3_1 = q_acc_3_1 + q_tmp_3_1;
+                q_acc_3_2 = q_acc_3_2 + q_tmp_3_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_0 = q_acc_4_0 + q_tmp_4_0;
+                q_acc_4_1 = q_acc_4_1 + q_tmp_4_1;
+                q_acc_4_2 = q_acc_4_2 + q_tmp_4_2;
+                q_acc_4_3 = q_acc_4_3 + q_tmp_4_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_0 = q_acc_5_0 + q_tmp_5_0;
+                q_acc_5_1 = q_acc_5_1 + q_tmp_5_1;
+                q_acc_5_2 = q_acc_5_2 + q_tmp_5_2;
+                q_acc_5_3 = q_acc_5_3 + q_tmp_5_3;
+                q_acc_5_4 = q_acc_5_4 + q_tmp_5_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_1_0;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_2_0;
+             const real_t elMat_2_1 = q_acc_2_1;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_3_0;
+             const real_t elMat_3_1 = q_acc_3_1;
+             const real_t elMat_3_2 = q_acc_3_2;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_4_0;
+             const real_t elMat_4_1 = q_acc_4_1;
+             const real_t elMat_4_2 = q_acc_4_2;
+             const real_t elMat_4_3 = q_acc_4_3;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_5_0;
+             const real_t elMat_5_1 = q_acc_5_1;
+             const real_t elMat_5_2 = q_acc_5_2;
+             const real_t elMat_5_3 = q_acc_5_3;
+             const real_t elMat_5_4 = q_acc_5_4;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a5f1bc99b07b2195e7708d1574ac344a22f57b88
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp
@@ -0,0 +1,587 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t * RESTRICT  _data_srcEdge, real_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_50 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_35 = tmp_qloop_32*0.66666666666666667 + tmp_qloop_34*0.66666666666666667;
+                const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_37 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                const real_t tmp_qloop_39 = tmp_qloop_32*1.0 + tmp_qloop_34*1.0;
+                const real_t tmp_qloop_40 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_41 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_42 = tmp_qloop_37*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_44 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                const real_t tmp_qloop_47 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_48 = (tmp_qloop_47*tmp_qloop_47);
+                const real_t tmp_qloop_49 = tmp_qloop_46 + tmp_qloop_48;
+                const real_t tmp_qloop_51 = pow(tmp_qloop_49, -0.50000000000000000)*tmp_qloop_50*1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_45*tmp_qloop_51;
+                const real_t tmp_qloop_53 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_45) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_47);
+                const real_t tmp_qloop_54 = pow(tmp_qloop_49, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_56 = tmp_qloop_47*tmp_qloop_51;
+                const real_t tmp_qloop_57 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_58 = tmp_qloop_45*tmp_qloop_47;
+                const real_t tmp_qloop_59 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_52 - tmp_qloop_48*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_56 + tmp_qloop_46*tmp_qloop_57) - (tmp_qloop_0*tmp_qloop_56 + tmp_qloop_55*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_52 - tmp_qloop_57*tmp_qloop_58))*_data_q_w[q];
+                const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_60 + tmp_qloop_61;
+                const real_t tmp_qloop_63 = tmp_qloop_60*2.0 + tmp_qloop_61*2.0;
+                const real_t tmp_qloop_64 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_40;
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_65*2.0 + tmp_qloop_66*2.0;
+                const real_t tmp_qloop_69 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_40;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_71 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_70*2.0 + tmp_qloop_71*2.0;
+                const real_t tmp_qloop_74 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_40;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_75*2.0 + tmp_qloop_76*2.0;
+                const real_t tmp_qloop_79 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_40;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_81 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_80*2.0 + tmp_qloop_81*2.0;
+                const real_t tmp_qloop_84 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_40;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_86 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_85*0.66666666666666667 + tmp_qloop_86*0.66666666666666667;
+                const real_t tmp_qloop_88 = tmp_qloop_85*1.0 + tmp_qloop_86*1.0;
+                const real_t tmp_qloop_89 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_90 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_91 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_90*1.0 + tmp_qloop_91*1.0;
+                const real_t tmp_qloop_94 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_96 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_95*1.0 + tmp_qloop_96*1.0;
+                const real_t tmp_qloop_99 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_101 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_100*1.0 + tmp_qloop_101*1.0;
+                const real_t tmp_qloop_104 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_106 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_59*(-tmp_qloop_35*(tmp_qloop_29 + tmp_qloop_31) + tmp_qloop_38 + tmp_qloop_39*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0) + tmp_qloop_44*(tmp_qloop_24*tmp_qloop_30 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_40));
+                const real_t q_tmp_0_1 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_62 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_63 + tmp_qloop_44*tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_67 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_68 + tmp_qloop_44*tmp_qloop_69);
+                const real_t q_tmp_0_3 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_72 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_73 + tmp_qloop_44*tmp_qloop_74);
+                const real_t q_tmp_0_4 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_77 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_78 + tmp_qloop_44*tmp_qloop_79);
+                const real_t q_tmp_0_5 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_82 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_83 + tmp_qloop_44*tmp_qloop_84);
+                const real_t q_tmp_1_1 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89);
+                const real_t q_tmp_1_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_87 + tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89);
+                const real_t q_tmp_1_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_87 + tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89);
+                const real_t q_tmp_1_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_87 + tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89);
+                const real_t q_tmp_1_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_87 + tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89);
+                const real_t q_tmp_2_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94);
+                const real_t q_tmp_2_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94);
+                const real_t q_tmp_2_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94);
+                const real_t q_tmp_2_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94);
+                const real_t q_tmp_3_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_38);
+                const real_t q_tmp_4_5 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_38);
+                const real_t q_tmp_5_5 = tmp_qloop_59*(tmp_qloop_38 + tmp_qloop_82*(tmp_qloop_105 + tmp_qloop_106)*-0.66666666666666667 + tmp_qloop_83*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0) + tmp_qloop_84*(tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_43)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_35 = tmp_qloop_32*0.66666666666666667 + tmp_qloop_34*0.66666666666666667;
+                const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_37 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                const real_t tmp_qloop_39 = tmp_qloop_32*1.0 + tmp_qloop_34*1.0;
+                const real_t tmp_qloop_40 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_41 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_42 = tmp_qloop_37*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_44 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                const real_t tmp_qloop_47 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_48 = (tmp_qloop_47*tmp_qloop_47);
+                const real_t tmp_qloop_49 = tmp_qloop_46 + tmp_qloop_48;
+                const real_t tmp_qloop_51 = pow(tmp_qloop_49, -0.50000000000000000)*tmp_qloop_50*1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_45*tmp_qloop_51;
+                const real_t tmp_qloop_53 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_45) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_47);
+                const real_t tmp_qloop_54 = pow(tmp_qloop_49, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_56 = tmp_qloop_47*tmp_qloop_51;
+                const real_t tmp_qloop_57 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_58 = tmp_qloop_45*tmp_qloop_47;
+                const real_t tmp_qloop_59 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_52 - tmp_qloop_48*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_56 + tmp_qloop_46*tmp_qloop_57) - (tmp_qloop_0*tmp_qloop_56 + tmp_qloop_55*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_52 - tmp_qloop_57*tmp_qloop_58))*_data_q_w[q];
+                const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_60 + tmp_qloop_61;
+                const real_t tmp_qloop_63 = tmp_qloop_60*2.0 + tmp_qloop_61*2.0;
+                const real_t tmp_qloop_64 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_40;
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_65*2.0 + tmp_qloop_66*2.0;
+                const real_t tmp_qloop_69 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_40;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_71 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_70*2.0 + tmp_qloop_71*2.0;
+                const real_t tmp_qloop_74 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_40;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_75*2.0 + tmp_qloop_76*2.0;
+                const real_t tmp_qloop_79 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_40;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_81 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_80*2.0 + tmp_qloop_81*2.0;
+                const real_t tmp_qloop_84 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_40;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_86 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_85*0.66666666666666667 + tmp_qloop_86*0.66666666666666667;
+                const real_t tmp_qloop_88 = tmp_qloop_85*1.0 + tmp_qloop_86*1.0;
+                const real_t tmp_qloop_89 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_90 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_91 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_90*1.0 + tmp_qloop_91*1.0;
+                const real_t tmp_qloop_94 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_96 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_95*1.0 + tmp_qloop_96*1.0;
+                const real_t tmp_qloop_99 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_101 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_100*1.0 + tmp_qloop_101*1.0;
+                const real_t tmp_qloop_104 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_106 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_59*(-tmp_qloop_35*(tmp_qloop_29 + tmp_qloop_31) + tmp_qloop_38 + tmp_qloop_39*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0) + tmp_qloop_44*(tmp_qloop_24*tmp_qloop_30 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_40));
+                const real_t q_tmp_0_1 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_62 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_63 + tmp_qloop_44*tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_67 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_68 + tmp_qloop_44*tmp_qloop_69);
+                const real_t q_tmp_0_3 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_72 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_73 + tmp_qloop_44*tmp_qloop_74);
+                const real_t q_tmp_0_4 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_77 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_78 + tmp_qloop_44*tmp_qloop_79);
+                const real_t q_tmp_0_5 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_82 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_83 + tmp_qloop_44*tmp_qloop_84);
+                const real_t q_tmp_1_1 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89);
+                const real_t q_tmp_1_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_87 + tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89);
+                const real_t q_tmp_1_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_87 + tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89);
+                const real_t q_tmp_1_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_87 + tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89);
+                const real_t q_tmp_1_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_87 + tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89);
+                const real_t q_tmp_2_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94);
+                const real_t q_tmp_2_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94);
+                const real_t q_tmp_2_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94);
+                const real_t q_tmp_2_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94);
+                const real_t q_tmp_3_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_38);
+                const real_t q_tmp_4_5 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_38);
+                const real_t q_tmp_5_5 = tmp_qloop_59*(tmp_qloop_38 + tmp_qloop_82*(tmp_qloop_105 + tmp_qloop_106)*-0.66666666666666667 + tmp_qloop_83*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0) + tmp_qloop_84*(tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_43)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5;
+             const real_t elMatVec_3 = q_acc_0_3*src_dof_0 + q_acc_1_3*src_dof_1 + q_acc_2_3*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5;
+             const real_t elMatVec_4 = q_acc_0_4*src_dof_0 + q_acc_1_4*src_dof_1 + q_acc_2_4*src_dof_2 + q_acc_3_4*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5;
+             const real_t elMatVec_5 = q_acc_0_5*src_dof_0 + q_acc_1_5*src_dof_1 + q_acc_2_5*src_dof_2 + q_acc_3_5*src_dof_3 + q_acc_4_5*src_dof_4 + q_acc_5_5*src_dof_5;
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d5d05faf2b3784225511074e75fe25a9b5b5f287
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,425 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_Edge, real_t * RESTRICT  _data_invDiag_Vertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_47 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_36 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_37 = (tmp_qloop_35*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])*(tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_39 = tmp_qloop_35*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_41 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_42 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (tmp_qloop_42*tmp_qloop_42);
+                const real_t tmp_qloop_44 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                const real_t tmp_qloop_46 = tmp_qloop_43 + tmp_qloop_45;
+                const real_t tmp_qloop_48 = pow(tmp_qloop_46, -0.50000000000000000)*tmp_qloop_47*1.0;
+                const real_t tmp_qloop_49 = tmp_qloop_42*tmp_qloop_48;
+                const real_t tmp_qloop_50 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_42) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_44);
+                const real_t tmp_qloop_51 = pow(tmp_qloop_46, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                const real_t tmp_qloop_53 = tmp_qloop_44*tmp_qloop_48;
+                const real_t tmp_qloop_54 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                const real_t tmp_qloop_55 = tmp_qloop_42*tmp_qloop_44;
+                const real_t tmp_qloop_56 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_49 - tmp_qloop_45*tmp_qloop_52)*(tmp_qloop_12*tmp_qloop_53 + tmp_qloop_43*tmp_qloop_54) - (tmp_qloop_0*tmp_qloop_53 + tmp_qloop_52*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_49 - tmp_qloop_54*tmp_qloop_55))*_data_q_w[q];
+                const real_t tmp_qloop_57 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_58 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_59 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_60 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_62 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_67 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_68 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_69 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_70 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_74 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_29 + tmp_qloop_31)*(tmp_qloop_32 + tmp_qloop_34)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_31*2.0)*(tmp_qloop_32*1.0 + tmp_qloop_34*1.0) + (tmp_qloop_24*tmp_qloop_30 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_1_1 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_57 + tmp_qloop_58)*(tmp_qloop_59 + tmp_qloop_60)*-0.66666666666666667 + (tmp_qloop_57*2.0 + tmp_qloop_58*2.0)*(tmp_qloop_59*1.0 + tmp_qloop_60*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_2_2 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_61 + tmp_qloop_62)*(tmp_qloop_63 + tmp_qloop_64)*-0.66666666666666667 + (tmp_qloop_61*2.0 + tmp_qloop_62*2.0)*(tmp_qloop_63*1.0 + tmp_qloop_64*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_3_3 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_65 + tmp_qloop_66)*(tmp_qloop_67 + tmp_qloop_68)*-0.66666666666666667 + (tmp_qloop_65*2.0 + tmp_qloop_66*2.0)*(tmp_qloop_67*1.0 + tmp_qloop_68*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_4_4 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_69 + tmp_qloop_70)*(tmp_qloop_71 + tmp_qloop_72)*-0.66666666666666667 + (tmp_qloop_69*2.0 + tmp_qloop_70*2.0)*(tmp_qloop_71*1.0 + tmp_qloop_72*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_5_5 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_73 + tmp_qloop_74)*(tmp_qloop_75 + tmp_qloop_76)*-0.66666666666666667 + (tmp_qloop_73*2.0 + tmp_qloop_74*2.0)*(tmp_qloop_75*1.0 + tmp_qloop_76*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_41)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_35 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_36 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_37 = (tmp_qloop_35*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])*(tmp_qloop_35*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                const real_t tmp_qloop_38 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_39 = tmp_qloop_35*0.5;
+                const real_t tmp_qloop_40 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_41 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_42 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_43 = (tmp_qloop_42*tmp_qloop_42);
+                const real_t tmp_qloop_44 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_45 = (tmp_qloop_44*tmp_qloop_44);
+                const real_t tmp_qloop_46 = tmp_qloop_43 + tmp_qloop_45;
+                const real_t tmp_qloop_48 = pow(tmp_qloop_46, -0.50000000000000000)*tmp_qloop_47*1.0;
+                const real_t tmp_qloop_49 = tmp_qloop_42*tmp_qloop_48;
+                const real_t tmp_qloop_50 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_42) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_44);
+                const real_t tmp_qloop_51 = pow(tmp_qloop_46, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                const real_t tmp_qloop_53 = tmp_qloop_44*tmp_qloop_48;
+                const real_t tmp_qloop_54 = tmp_qloop_51*(radRayVertex + tmp_qloop_47*tmp_qloop_50);
+                const real_t tmp_qloop_55 = tmp_qloop_42*tmp_qloop_44;
+                const real_t tmp_qloop_56 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_49 - tmp_qloop_45*tmp_qloop_52)*(tmp_qloop_12*tmp_qloop_53 + tmp_qloop_43*tmp_qloop_54) - (tmp_qloop_0*tmp_qloop_53 + tmp_qloop_52*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_49 - tmp_qloop_54*tmp_qloop_55))*_data_q_w[q];
+                const real_t tmp_qloop_57 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_58 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_59 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_60 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_62 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_63 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_64 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_67 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_68 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_69 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_70 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_71 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_72 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_73 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_74 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_29 + tmp_qloop_31)*(tmp_qloop_32 + tmp_qloop_34)*-0.66666666666666667 + (tmp_qloop_29*2.0 + tmp_qloop_31*2.0)*(tmp_qloop_32*1.0 + tmp_qloop_34*1.0) + (tmp_qloop_24*tmp_qloop_30 + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_1_1 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_57 + tmp_qloop_58)*(tmp_qloop_59 + tmp_qloop_60)*-0.66666666666666667 + (tmp_qloop_57*2.0 + tmp_qloop_58*2.0)*(tmp_qloop_59*1.0 + tmp_qloop_60*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_2_2 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_61 + tmp_qloop_62)*(tmp_qloop_63 + tmp_qloop_64)*-0.66666666666666667 + (tmp_qloop_61*2.0 + tmp_qloop_62*2.0)*(tmp_qloop_63*1.0 + tmp_qloop_64*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_3_3 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_65 + tmp_qloop_66)*(tmp_qloop_67 + tmp_qloop_68)*-0.66666666666666667 + (tmp_qloop_65*2.0 + tmp_qloop_66*2.0)*(tmp_qloop_67*1.0 + tmp_qloop_68*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_4_4 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_69 + tmp_qloop_70)*(tmp_qloop_71 + tmp_qloop_72)*-0.66666666666666667 + (tmp_qloop_69*2.0 + tmp_qloop_70*2.0)*(tmp_qloop_71*1.0 + tmp_qloop_72*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_41)*2.0);
+                const real_t q_tmp_5_5 = tmp_qloop_56*(tmp_qloop_37 + (tmp_qloop_73 + tmp_qloop_74)*(tmp_qloop_75 + tmp_qloop_76)*-0.66666666666666667 + (tmp_qloop_73*2.0 + tmp_qloop_74*2.0)*(tmp_qloop_75*1.0 + tmp_qloop_76*1.0) + (tmp_qloop_35*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_38)*(tmp_qloop_39*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_40*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_41)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             const real_t elMatDiag_3 = q_acc_3_3;
+             const real_t elMatDiag_4 = q_acc_4_4;
+             const real_t elMatDiag_5 = q_acc_5_5;
+             _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7808efb57333adba042c4ab360943336431642f6
--- /dev/null
+++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp
@@ -0,0 +1,745 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P2ElementwiseFullStokesAnnulusMap_1_1.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_muEdge, real_t * RESTRICT  _data_muVertex, idx_t * RESTRICT  _data_srcEdge, idx_t * RESTRICT  _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669};
+   
+       const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001};
+   
+       const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_1 = -tmp_qloop_0;
+       const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0;
+       const real_t tmp_qloop_12 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_13 = -tmp_qloop_12;
+       const real_t tmp_qloop_14 = rayVertex_1 - refVertex_1;
+       const real_t tmp_qloop_15 = radRayVertex - radRefVertex;
+       const real_t tmp_qloop_16 = -tmp_qloop_15*1.0 / (-tmp_qloop_1*tmp_qloop_11 + tmp_qloop_13*tmp_qloop_14);
+       const real_t tmp_qloop_50 = tmp_qloop_15*1.0 / (tmp_qloop_0*tmp_qloop_11 - tmp_qloop_12*tmp_qloop_14);
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_0_0_GRAY [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_GRAY [] = {((real_t)(jac_affine_inv_0_0_GRAY*-0.33333333333333348 + jac_affine_inv_1_0_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3333333333333333 + jac_affine_inv_1_0_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.3333333333333333 + jac_affine_inv_1_0_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_0_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999973 + jac_affine_inv_1_0_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*2.3999999999999999 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-2.3999999999999999 + jac_affine_inv_1_0_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*0.19999999999999996 + jac_affine_inv_1_0_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.5999999999999999 + jac_affine_inv_1_0_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*-1.4000000000000001 + jac_affine_inv_1_0_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_GRAY*0.80000000000000004 + jac_affine_inv_1_0_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_GRAY*-0.80000000000000004 + jac_affine_inv_1_0_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_GRAY*1.5999999999999999 + jac_affine_inv_1_0_GRAY*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_GRAY [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_GRAY [] = {((real_t)(jac_affine_inv_0_1_GRAY*-0.33333333333333348 + jac_affine_inv_1_1_GRAY*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_GRAY*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3333333333333333 + jac_affine_inv_1_1_GRAY*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.3333333333333333 + jac_affine_inv_1_1_GRAY*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_GRAY*2.2204460492503131e-16 + jac_affine_inv_1_1_GRAY*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999973 + jac_affine_inv_1_1_GRAY*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*2.3999999999999999 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-2.3999999999999999 + jac_affine_inv_1_1_GRAY*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*0.19999999999999996 + jac_affine_inv_1_1_GRAY*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.5999999999999999 + jac_affine_inv_1_1_GRAY*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*-1.4000000000000001 + jac_affine_inv_1_1_GRAY*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_GRAY*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_GRAY*0.80000000000000004 + jac_affine_inv_1_1_GRAY*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_GRAY*-0.80000000000000004 + jac_affine_inv_1_1_GRAY*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_GRAY*1.5999999999999999 + jac_affine_inv_1_1_GRAY*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q];
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q];
+                const real_t tmp_qloop_35 = tmp_qloop_32*0.66666666666666667 + tmp_qloop_34*0.66666666666666667;
+                const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_37 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q]);
+                const real_t tmp_qloop_39 = tmp_qloop_32*1.0 + tmp_qloop_34*1.0;
+                const real_t tmp_qloop_40 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_GRAY[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_GRAY[q];
+                const real_t tmp_qloop_41 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_42 = tmp_qloop_37*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_GRAY[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_GRAY[q];
+                const real_t tmp_qloop_44 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                const real_t tmp_qloop_47 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_48 = (tmp_qloop_47*tmp_qloop_47);
+                const real_t tmp_qloop_49 = tmp_qloop_46 + tmp_qloop_48;
+                const real_t tmp_qloop_51 = pow(tmp_qloop_49, -0.50000000000000000)*tmp_qloop_50*1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_45*tmp_qloop_51;
+                const real_t tmp_qloop_53 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_45) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_47);
+                const real_t tmp_qloop_54 = pow(tmp_qloop_49, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_56 = tmp_qloop_47*tmp_qloop_51;
+                const real_t tmp_qloop_57 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_58 = tmp_qloop_45*tmp_qloop_47;
+                const real_t tmp_qloop_59 = abs_det_jac_affine_GRAY*(mu_dof_0*_data_phi_0_0_GRAY[6*q] + mu_dof_1*_data_phi_0_0_GRAY[6*q + 1] + mu_dof_2*_data_phi_0_0_GRAY[6*q + 2] + mu_dof_3*_data_phi_0_0_GRAY[6*q + 3] + mu_dof_4*_data_phi_0_0_GRAY[6*q + 4] + mu_dof_5*_data_phi_0_0_GRAY[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_52 - tmp_qloop_48*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_56 + tmp_qloop_46*tmp_qloop_57) - (tmp_qloop_0*tmp_qloop_56 + tmp_qloop_55*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_52 - tmp_qloop_57*tmp_qloop_58))*_data_q_w[q];
+                const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_60 + tmp_qloop_61;
+                const real_t tmp_qloop_63 = tmp_qloop_60*2.0 + tmp_qloop_61*2.0;
+                const real_t tmp_qloop_64 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 1] + tmp_qloop_40;
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_65*2.0 + tmp_qloop_66*2.0;
+                const real_t tmp_qloop_69 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 2] + tmp_qloop_40;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_71 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_70*2.0 + tmp_qloop_71*2.0;
+                const real_t tmp_qloop_74 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 3] + tmp_qloop_40;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_75*2.0 + tmp_qloop_76*2.0;
+                const real_t tmp_qloop_79 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 4] + tmp_qloop_40;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_81 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_80*2.0 + tmp_qloop_81*2.0;
+                const real_t tmp_qloop_84 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_GRAY[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q + 5] + tmp_qloop_40;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_86 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_85*0.66666666666666667 + tmp_qloop_86*0.66666666666666667;
+                const real_t tmp_qloop_88 = tmp_qloop_85*1.0 + tmp_qloop_86*1.0;
+                const real_t tmp_qloop_89 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 1] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 1] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_90 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_91 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_90*1.0 + tmp_qloop_91*1.0;
+                const real_t tmp_qloop_94 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 2] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 2] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_96 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_95*1.0 + tmp_qloop_96*1.0;
+                const real_t tmp_qloop_99 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 3] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 3] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_101 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_100*1.0 + tmp_qloop_101*1.0;
+                const real_t tmp_qloop_104 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 4] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 4] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5];
+                const real_t tmp_qloop_106 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_59*(-tmp_qloop_35*(tmp_qloop_29 + tmp_qloop_31) + tmp_qloop_38 + tmp_qloop_39*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0) + tmp_qloop_44*(tmp_qloop_24*tmp_qloop_30 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_GRAY[6*q] + tmp_qloop_40));
+                const real_t q_tmp_0_1 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_62 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_63 + tmp_qloop_44*tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_67 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_68 + tmp_qloop_44*tmp_qloop_69);
+                const real_t q_tmp_0_3 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_72 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_73 + tmp_qloop_44*tmp_qloop_74);
+                const real_t q_tmp_0_4 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_77 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_78 + tmp_qloop_44*tmp_qloop_79);
+                const real_t q_tmp_0_5 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_82 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_83 + tmp_qloop_44*tmp_qloop_84);
+                const real_t q_tmp_1_1 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89);
+                const real_t q_tmp_1_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_87 + tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89);
+                const real_t q_tmp_1_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_87 + tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89);
+                const real_t q_tmp_1_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_87 + tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89);
+                const real_t q_tmp_1_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_87 + tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89);
+                const real_t q_tmp_2_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94);
+                const real_t q_tmp_2_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94);
+                const real_t q_tmp_2_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94);
+                const real_t q_tmp_2_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94);
+                const real_t q_tmp_3_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_38);
+                const real_t q_tmp_4_5 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_38);
+                const real_t q_tmp_5_5 = tmp_qloop_59*(tmp_qloop_38 + tmp_qloop_82*(tmp_qloop_105 + tmp_qloop_106)*-0.66666666666666667 + tmp_qloop_83*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0) + tmp_qloop_84*(tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_1_GRAY[6*q + 5] + tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_1_GRAY[6*q + 5] + tmp_qloop_43)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_0_0_BLUE [] = {-0.11111111111111116, -0.1111111111111111, -0.1111111111111111, 0.44444444444444442, 0.44444444444444442, 0.44444444444444442, -0.11999999999999988, -0.12, 0.12, 0.47999999999999998, 0.47999999999999998, 0.16000000000000003, -0.11999999999999988, 0.12, -0.12, 0.47999999999999998, 0.16000000000000003, 0.47999999999999998, 0.11999999999999988, -0.12, -0.12, 0.16000000000000003, 0.47999999999999998, 0.47999999999999998};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_phi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_0_1_BLUE [] = {((real_t)(jac_affine_inv_0_0_BLUE*-0.33333333333333348 + jac_affine_inv_1_0_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_0_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3333333333333333 + jac_affine_inv_1_0_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.3333333333333333 + jac_affine_inv_1_0_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_0_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_0_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999973 + jac_affine_inv_1_0_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*2.3999999999999999 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-2.3999999999999999 + jac_affine_inv_1_0_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*0.19999999999999996 + jac_affine_inv_1_0_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.5999999999999999 + jac_affine_inv_1_0_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*-1.4000000000000001 + jac_affine_inv_1_0_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_0_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_0_BLUE*0.80000000000000004 + jac_affine_inv_1_0_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_0_BLUE*-0.80000000000000004 + jac_affine_inv_1_0_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_0_BLUE*1.5999999999999999 + jac_affine_inv_1_0_BLUE*-0.80000000000000004))};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_0_BLUE [] = {0.0, 0.0, 0.0, 0.0};
+      
+          const real_t _data_jac_affine_inv_T_grad_psi_1_1_BLUE [] = {((real_t)(jac_affine_inv_0_1_BLUE*-0.33333333333333348 + jac_affine_inv_1_1_BLUE*-0.33333333333333348)), ((real_t)(jac_affine_inv_0_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_1_1_BLUE*0.33333333333333326)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3333333333333333 + jac_affine_inv_1_1_BLUE*1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.3333333333333333 + jac_affine_inv_1_1_BLUE*2.2204460492503131e-16)), ((real_t)(jac_affine_inv_0_1_BLUE*2.2204460492503131e-16 + jac_affine_inv_1_1_BLUE*-1.3333333333333333)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999973 + jac_affine_inv_1_1_BLUE*0.19999999999999973)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*2.3999999999999999 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-2.3999999999999999 + jac_affine_inv_1_1_BLUE*-1.5999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*0.19999999999999996 + jac_affine_inv_1_1_BLUE*0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*1.3999999999999999)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.5999999999999999 + jac_affine_inv_1_1_BLUE*-2.3999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*-1.4000000000000001 + jac_affine_inv_1_1_BLUE*-1.4000000000000001)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_1_1_BLUE*-0.19999999999999996)), ((real_t)(jac_affine_inv_0_1_BLUE*0.80000000000000004 + jac_affine_inv_1_1_BLUE*0.80000000000000004)), ((real_t)(jac_affine_inv_0_1_BLUE*-0.80000000000000004 + jac_affine_inv_1_1_BLUE*1.5999999999999999)), ((real_t)(jac_affine_inv_0_1_BLUE*1.5999999999999999 + jac_affine_inv_1_1_BLUE*-0.80000000000000004))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t mu_dof_0 = _data_muVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t mu_dof_1 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_2 = _data_muVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t mu_dof_3 = _data_muEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t mu_dof_4 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             const real_t mu_dof_5 = _data_muEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_0_3 = 0.0;
+             real_t q_acc_0_4 = 0.0;
+             real_t q_acc_0_5 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_1_3 = 0.0;
+             real_t q_acc_1_4 = 0.0;
+             real_t q_acc_1_5 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             real_t q_acc_2_3 = 0.0;
+             real_t q_acc_2_4 = 0.0;
+             real_t q_acc_2_5 = 0.0;
+             real_t q_acc_3_3 = 0.0;
+             real_t q_acc_3_4 = 0.0;
+             real_t q_acc_3_5 = 0.0;
+             real_t q_acc_4_4 = 0.0;
+             real_t q_acc_4_5 = 0.0;
+             real_t q_acc_5_5 = 0.0;
+             const real_t tmp_qloop_2 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_qloop_3 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_qloop_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_qloop_7 = p_affine_0_1 - p_affine_2_1;
+             for (int64_t q = 0; q < 4; q += 1)
+             {
+                const real_t tmp_qloop_4 = p_affine_0_0 - tmp_qloop_2*_data_q_p_0[q] - tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_5 = (tmp_qloop_4*tmp_qloop_4);
+                const real_t tmp_qloop_8 = p_affine_0_1 - tmp_qloop_6*_data_q_p_0[q] - tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_9 = (tmp_qloop_8*tmp_qloop_8);
+                const real_t tmp_qloop_10 = tmp_qloop_5 + tmp_qloop_9;
+                const real_t tmp_qloop_17 = pow(tmp_qloop_10, -0.50000000000000000)*tmp_qloop_16*1.0;
+                const real_t tmp_qloop_18 = tmp_qloop_17*tmp_qloop_4;
+                const real_t tmp_qloop_19 = pow(tmp_qloop_10, -1.5000000000000000);
+                const real_t tmp_qloop_20 = radRayVertex + tmp_qloop_16*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_4) - tmp_qloop_13*(-rayVertex_1 + tmp_qloop_8));
+                const real_t tmp_qloop_21 = tmp_qloop_19*tmp_qloop_20*1.0;
+                const real_t tmp_qloop_22 = tmp_qloop_1*tmp_qloop_18 + tmp_qloop_21*tmp_qloop_9;
+                const real_t tmp_qloop_23 = tmp_qloop_17*tmp_qloop_8;
+                const real_t tmp_qloop_24 = -tmp_qloop_13*tmp_qloop_23 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_5*1.0;
+                const real_t tmp_qloop_25 = tmp_qloop_13*tmp_qloop_18 + tmp_qloop_19*tmp_qloop_20*tmp_qloop_4*tmp_qloop_8*1.0;
+                const real_t tmp_qloop_26 = tmp_qloop_1*tmp_qloop_23 - tmp_qloop_21*tmp_qloop_4*tmp_qloop_8;
+                const real_t tmp_qloop_27 = 1.0 / (tmp_qloop_22*tmp_qloop_24 + tmp_qloop_25*tmp_qloop_26);
+                const real_t tmp_qloop_28 = tmp_qloop_22*tmp_qloop_27;
+                const real_t tmp_qloop_29 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_30 = tmp_qloop_27*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_31 = tmp_qloop_25*tmp_qloop_30;
+                const real_t tmp_qloop_32 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q];
+                const real_t tmp_qloop_33 = tmp_qloop_25*tmp_qloop_27;
+                const real_t tmp_qloop_34 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q];
+                const real_t tmp_qloop_35 = tmp_qloop_32*0.66666666666666667 + tmp_qloop_34*0.66666666666666667;
+                const real_t tmp_qloop_36 = tmp_qloop_24*tmp_qloop_27;
+                const real_t tmp_qloop_37 = -tmp_qloop_26*tmp_qloop_27;
+                const real_t tmp_qloop_38 = (tmp_qloop_36*1.0*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q] + tmp_qloop_37*1.0*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q])*(tmp_qloop_36*2.0*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q] + tmp_qloop_37*2.0*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q]);
+                const real_t tmp_qloop_39 = tmp_qloop_32*1.0 + tmp_qloop_34*1.0;
+                const real_t tmp_qloop_40 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_0_BLUE[q] + tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_0_BLUE[q];
+                const real_t tmp_qloop_41 = tmp_qloop_36*0.5;
+                const real_t tmp_qloop_42 = tmp_qloop_37*0.5;
+                const real_t tmp_qloop_43 = tmp_qloop_28*0.5*_data_jac_affine_inv_T_grad_psi_1_0_BLUE[q] + tmp_qloop_33*0.5*_data_jac_affine_inv_T_grad_psi_0_0_BLUE[q];
+                const real_t tmp_qloop_44 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_45 = -p_affine_0_0 + tmp_qloop_2*_data_q_p_0[q] + tmp_qloop_3*_data_q_p_1[q];
+                const real_t tmp_qloop_46 = (tmp_qloop_45*tmp_qloop_45);
+                const real_t tmp_qloop_47 = -p_affine_0_1 + tmp_qloop_6*_data_q_p_0[q] + tmp_qloop_7*_data_q_p_1[q];
+                const real_t tmp_qloop_48 = (tmp_qloop_47*tmp_qloop_47);
+                const real_t tmp_qloop_49 = tmp_qloop_46 + tmp_qloop_48;
+                const real_t tmp_qloop_51 = pow(tmp_qloop_49, -0.50000000000000000)*tmp_qloop_50*1.0;
+                const real_t tmp_qloop_52 = tmp_qloop_45*tmp_qloop_51;
+                const real_t tmp_qloop_53 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_45) + tmp_qloop_12*(rayVertex_1 + tmp_qloop_47);
+                const real_t tmp_qloop_54 = pow(tmp_qloop_49, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_55 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_56 = tmp_qloop_47*tmp_qloop_51;
+                const real_t tmp_qloop_57 = tmp_qloop_54*(radRayVertex + tmp_qloop_50*tmp_qloop_53);
+                const real_t tmp_qloop_58 = tmp_qloop_45*tmp_qloop_47;
+                const real_t tmp_qloop_59 = abs_det_jac_affine_BLUE*(mu_dof_0*_data_phi_0_0_BLUE[6*q] + mu_dof_1*_data_phi_0_0_BLUE[6*q + 1] + mu_dof_2*_data_phi_0_0_BLUE[6*q + 2] + mu_dof_3*_data_phi_0_0_BLUE[6*q + 3] + mu_dof_4*_data_phi_0_0_BLUE[6*q + 4] + mu_dof_5*_data_phi_0_0_BLUE[6*q + 5])*abs((tmp_qloop_0*tmp_qloop_52 - tmp_qloop_48*tmp_qloop_55)*(tmp_qloop_12*tmp_qloop_56 + tmp_qloop_46*tmp_qloop_57) - (tmp_qloop_0*tmp_qloop_56 + tmp_qloop_55*tmp_qloop_58)*(tmp_qloop_12*tmp_qloop_52 - tmp_qloop_57*tmp_qloop_58))*_data_q_w[q];
+                const real_t tmp_qloop_60 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_61 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_62 = tmp_qloop_60 + tmp_qloop_61;
+                const real_t tmp_qloop_63 = tmp_qloop_60*2.0 + tmp_qloop_61*2.0;
+                const real_t tmp_qloop_64 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 1] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 1] + tmp_qloop_40;
+                const real_t tmp_qloop_65 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_66 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66;
+                const real_t tmp_qloop_68 = tmp_qloop_65*2.0 + tmp_qloop_66*2.0;
+                const real_t tmp_qloop_69 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 2] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 2] + tmp_qloop_40;
+                const real_t tmp_qloop_70 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_71 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71;
+                const real_t tmp_qloop_73 = tmp_qloop_70*2.0 + tmp_qloop_71*2.0;
+                const real_t tmp_qloop_74 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 3] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 3] + tmp_qloop_40;
+                const real_t tmp_qloop_75 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_76 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_77 = tmp_qloop_75 + tmp_qloop_76;
+                const real_t tmp_qloop_78 = tmp_qloop_75*2.0 + tmp_qloop_76*2.0;
+                const real_t tmp_qloop_79 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 4] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 4] + tmp_qloop_40;
+                const real_t tmp_qloop_80 = tmp_qloop_28*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_81 = tmp_qloop_33*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_82 = tmp_qloop_80 + tmp_qloop_81;
+                const real_t tmp_qloop_83 = tmp_qloop_80*2.0 + tmp_qloop_81*2.0;
+                const real_t tmp_qloop_84 = tmp_qloop_36*_data_jac_affine_inv_T_grad_phi_0_1_BLUE[6*q + 5] + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q + 5] + tmp_qloop_40;
+                const real_t tmp_qloop_85 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_86 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1];
+                const real_t tmp_qloop_87 = tmp_qloop_85*0.66666666666666667 + tmp_qloop_86*0.66666666666666667;
+                const real_t tmp_qloop_88 = tmp_qloop_85*1.0 + tmp_qloop_86*1.0;
+                const real_t tmp_qloop_89 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 1] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 1] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_90 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_91 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2];
+                const real_t tmp_qloop_92 = tmp_qloop_90*0.66666666666666667 + tmp_qloop_91*0.66666666666666667;
+                const real_t tmp_qloop_93 = tmp_qloop_90*1.0 + tmp_qloop_91*1.0;
+                const real_t tmp_qloop_94 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 2] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 2] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_95 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_96 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3];
+                const real_t tmp_qloop_97 = tmp_qloop_95*0.66666666666666667 + tmp_qloop_96*0.66666666666666667;
+                const real_t tmp_qloop_98 = tmp_qloop_95*1.0 + tmp_qloop_96*1.0;
+                const real_t tmp_qloop_99 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 3] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 3] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_100 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_101 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4];
+                const real_t tmp_qloop_102 = tmp_qloop_100*0.66666666666666667 + tmp_qloop_101*0.66666666666666667;
+                const real_t tmp_qloop_103 = tmp_qloop_100*1.0 + tmp_qloop_101*1.0;
+                const real_t tmp_qloop_104 = tmp_qloop_41*2.0*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 4] + tmp_qloop_42*2.0*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 4] + tmp_qloop_43*2.0;
+                const real_t tmp_qloop_105 = tmp_qloop_28*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5];
+                const real_t tmp_qloop_106 = tmp_qloop_33*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5];
+                const real_t q_tmp_0_0 = tmp_qloop_59*(-tmp_qloop_35*(tmp_qloop_29 + tmp_qloop_31) + tmp_qloop_38 + tmp_qloop_39*(tmp_qloop_29*2.0 + tmp_qloop_31*2.0) + tmp_qloop_44*(tmp_qloop_24*tmp_qloop_30 + tmp_qloop_37*_data_jac_affine_inv_T_grad_phi_1_1_BLUE[6*q] + tmp_qloop_40));
+                const real_t q_tmp_0_1 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_62 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_63 + tmp_qloop_44*tmp_qloop_64);
+                const real_t q_tmp_0_2 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_67 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_68 + tmp_qloop_44*tmp_qloop_69);
+                const real_t q_tmp_0_3 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_72 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_73 + tmp_qloop_44*tmp_qloop_74);
+                const real_t q_tmp_0_4 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_77 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_78 + tmp_qloop_44*tmp_qloop_79);
+                const real_t q_tmp_0_5 = tmp_qloop_59*(-tmp_qloop_35*tmp_qloop_82 + tmp_qloop_38 + tmp_qloop_39*tmp_qloop_83 + tmp_qloop_44*tmp_qloop_84);
+                const real_t q_tmp_1_1 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88 + tmp_qloop_64*tmp_qloop_89);
+                const real_t q_tmp_1_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_87 + tmp_qloop_68*tmp_qloop_88 + tmp_qloop_69*tmp_qloop_89);
+                const real_t q_tmp_1_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_87 + tmp_qloop_73*tmp_qloop_88 + tmp_qloop_74*tmp_qloop_89);
+                const real_t q_tmp_1_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_87 + tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89);
+                const real_t q_tmp_1_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_87 + tmp_qloop_83*tmp_qloop_88 + tmp_qloop_84*tmp_qloop_89);
+                const real_t q_tmp_2_2 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_67*tmp_qloop_92 + tmp_qloop_68*tmp_qloop_93 + tmp_qloop_69*tmp_qloop_94);
+                const real_t q_tmp_2_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_92 + tmp_qloop_73*tmp_qloop_93 + tmp_qloop_74*tmp_qloop_94);
+                const real_t q_tmp_2_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_92 + tmp_qloop_78*tmp_qloop_93 + tmp_qloop_79*tmp_qloop_94);
+                const real_t q_tmp_2_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_92 + tmp_qloop_83*tmp_qloop_93 + tmp_qloop_84*tmp_qloop_94);
+                const real_t q_tmp_3_3 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_72*tmp_qloop_97 + tmp_qloop_73*tmp_qloop_98 + tmp_qloop_74*tmp_qloop_99);
+                const real_t q_tmp_3_4 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_77*tmp_qloop_97 + tmp_qloop_78*tmp_qloop_98 + tmp_qloop_79*tmp_qloop_99);
+                const real_t q_tmp_3_5 = tmp_qloop_59*(tmp_qloop_38 - tmp_qloop_82*tmp_qloop_97 + tmp_qloop_83*tmp_qloop_98 + tmp_qloop_84*tmp_qloop_99);
+                const real_t q_tmp_4_4 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_77 + tmp_qloop_103*tmp_qloop_78 + tmp_qloop_104*tmp_qloop_79 + tmp_qloop_38);
+                const real_t q_tmp_4_5 = tmp_qloop_59*(-tmp_qloop_102*tmp_qloop_82 + tmp_qloop_103*tmp_qloop_83 + tmp_qloop_104*tmp_qloop_84 + tmp_qloop_38);
+                const real_t q_tmp_5_5 = tmp_qloop_59*(tmp_qloop_38 + tmp_qloop_82*(tmp_qloop_105 + tmp_qloop_106)*-0.66666666666666667 + tmp_qloop_83*(tmp_qloop_105*1.0 + tmp_qloop_106*1.0) + tmp_qloop_84*(tmp_qloop_41*_data_jac_affine_inv_T_grad_psi_0_1_BLUE[6*q + 5] + tmp_qloop_42*_data_jac_affine_inv_T_grad_psi_1_1_BLUE[6*q + 5] + tmp_qloop_43)*2.0);
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_0_3 = q_acc_0_3 + q_tmp_0_3;
+                q_acc_0_4 = q_acc_0_4 + q_tmp_0_4;
+                q_acc_0_5 = q_acc_0_5 + q_tmp_0_5;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_1_3 = q_acc_1_3 + q_tmp_1_3;
+                q_acc_1_4 = q_acc_1_4 + q_tmp_1_4;
+                q_acc_1_5 = q_acc_1_5 + q_tmp_1_5;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                q_acc_2_3 = q_acc_2_3 + q_tmp_2_3;
+                q_acc_2_4 = q_acc_2_4 + q_tmp_2_4;
+                q_acc_2_5 = q_acc_2_5 + q_tmp_2_5;
+                q_acc_3_3 = q_acc_3_3 + q_tmp_3_3;
+                q_acc_3_4 = q_acc_3_4 + q_tmp_3_4;
+                q_acc_3_5 = q_acc_3_5 + q_tmp_3_5;
+                q_acc_4_4 = q_acc_4_4 + q_tmp_4_4;
+                q_acc_4_5 = q_acc_4_5 + q_tmp_4_5;
+                q_acc_5_5 = q_acc_5_5 + q_tmp_5_5;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_0_3 = q_acc_0_3;
+             const real_t elMat_0_4 = q_acc_0_4;
+             const real_t elMat_0_5 = q_acc_0_5;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_1_3 = q_acc_1_3;
+             const real_t elMat_1_4 = q_acc_1_4;
+             const real_t elMat_1_5 = q_acc_1_5;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+             const real_t elMat_2_3 = q_acc_2_3;
+             const real_t elMat_2_4 = q_acc_2_4;
+             const real_t elMat_2_5 = q_acc_2_5;
+             const real_t elMat_3_0 = q_acc_0_3;
+             const real_t elMat_3_1 = q_acc_1_3;
+             const real_t elMat_3_2 = q_acc_2_3;
+             const real_t elMat_3_3 = q_acc_3_3;
+             const real_t elMat_3_4 = q_acc_3_4;
+             const real_t elMat_3_5 = q_acc_3_5;
+             const real_t elMat_4_0 = q_acc_0_4;
+             const real_t elMat_4_1 = q_acc_1_4;
+             const real_t elMat_4_2 = q_acc_2_4;
+             const real_t elMat_4_3 = q_acc_3_4;
+             const real_t elMat_4_4 = q_acc_4_4;
+             const real_t elMat_4_5 = q_acc_4_5;
+             const real_t elMat_5_0 = q_acc_0_5;
+             const real_t elMat_5_1 = q_acc_1_5;
+             const real_t elMat_5_2 = q_acc_2_5;
+             const real_t elMat_5_3 = q_acc_3_5;
+             const real_t elMat_5_4 = q_acc_4_5;
+             const real_t elMat_5_5 = q_acc_5_5;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 6 );
+             std::vector< real_t > _data_mat( 36 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_0_3));
+             _data_mat[4] = ((real_t)(elMat_0_4));
+             _data_mat[5] = ((real_t)(elMat_0_5));
+             _data_mat[6] = ((real_t)(elMat_1_0));
+             _data_mat[7] = ((real_t)(elMat_1_1));
+             _data_mat[8] = ((real_t)(elMat_1_2));
+             _data_mat[9] = ((real_t)(elMat_1_3));
+             _data_mat[10] = ((real_t)(elMat_1_4));
+             _data_mat[11] = ((real_t)(elMat_1_5));
+             _data_mat[12] = ((real_t)(elMat_2_0));
+             _data_mat[13] = ((real_t)(elMat_2_1));
+             _data_mat[14] = ((real_t)(elMat_2_2));
+             _data_mat[15] = ((real_t)(elMat_2_3));
+             _data_mat[16] = ((real_t)(elMat_2_4));
+             _data_mat[17] = ((real_t)(elMat_2_5));
+             _data_mat[18] = ((real_t)(elMat_3_0));
+             _data_mat[19] = ((real_t)(elMat_3_1));
+             _data_mat[20] = ((real_t)(elMat_3_2));
+             _data_mat[21] = ((real_t)(elMat_3_3));
+             _data_mat[22] = ((real_t)(elMat_3_4));
+             _data_mat[23] = ((real_t)(elMat_3_5));
+             _data_mat[24] = ((real_t)(elMat_4_0));
+             _data_mat[25] = ((real_t)(elMat_4_1));
+             _data_mat[26] = ((real_t)(elMat_4_2));
+             _data_mat[27] = ((real_t)(elMat_4_3));
+             _data_mat[28] = ((real_t)(elMat_4_4));
+             _data_mat[29] = ((real_t)(elMat_4_5));
+             _data_mat[30] = ((real_t)(elMat_5_0));
+             _data_mat[31] = ((real_t)(elMat_5_1));
+             _data_mat[32] = ((real_t)(elMat_5_2));
+             _data_mat[33] = ((real_t)(elMat_5_3));
+             _data_mat[34] = ((real_t)(elMat_5_4));
+             _data_mat[35] = ((real_t)(elMat_5_5));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/CMakeLists.txt b/operators/gradient/CMakeLists.txt
index 42be7a6cfb40096189573006c756aeee8188384a..0c37a89aa82b7918a363ba6ece752e02096733c6 100644
--- a/operators/gradient/CMakeLists.txt
+++ b/operators/gradient/CMakeLists.txt
@@ -1,5 +1,9 @@
 add_library( opgen-gradient
 
+   P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp
+   P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp
+   P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp
+   P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp
    P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp
    P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp
    P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp
@@ -17,6 +21,8 @@ add_library( opgen-gradient
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-gradient PRIVATE
 
+      avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
       avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
       avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
@@ -25,6 +31,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
       avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp
       avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp
       avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
       noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
       noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp
       noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp
@@ -37,6 +45,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
 
    set_source_files_properties(
 
+      avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
+      avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
       avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
       avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
       avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp
@@ -55,6 +65,10 @@ else()
 
    target_sources(opgen-gradient PRIVATE
 
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
+      noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
       noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp
       noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp
       noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..951ebb4290edb89864d62c26248ab852f7aa984d
--- /dev/null
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp
@@ -0,0 +1,276 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P1ToP2ElementwiseGradientAnnulusMap_0_0::P1ToP2ElementwiseGradientAnnulusMap_0_0(
+    const std::shared_ptr< PrimitiveStorage >& storage,
+    size_t                                     minLevel,
+    size_t                                     maxLevel )
+: Operator( storage, minLevel, maxLevel )
+{}
+
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >& src,
+                                                     const P2Function< real_t >& dst,
+                                                     uint_t                      level,
+                                                     DoFType                     flag,
+                                                     UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_src       = face.getData( src.getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_src,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                        const P1Function< idx_t >&                  src,
+                                                        const P2Function< idx_t >&                  dst,
+                                                        uint_t                                      level,
+                                                        DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t* _data_src       = face.getData( src.getFaceDataID() )->getPointer( level );
+         idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_src,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..04e2a71ad535473c68e99457ca7bc39f46ab0fb7
--- /dev/null
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp
@@ -0,0 +1,135 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p1functionspace/P1Function.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Gradient.
+///
+/// Component:    0
+/// Geometry map: AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (scalar space:    Lagrange, degree: 1)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///
+///     ∫ - ( ∇ · v ) u
+
+class P1ToP2ElementwiseGradientAnnulusMap_0_0 : public Operator< P1Function< real_t >, P2Function< real_t > >
+{
+ public:
+   P1ToP2ElementwiseGradientAnnulusMap_0_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                            size_t                                     minLevel,
+                                            size_t                                     maxLevel );
+
+   void apply( const P1Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P1Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    220     330      17      12      3              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_src,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    202     312      17      12      3              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           idx_t* RESTRICT                      _data_src,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a84cd2a86460310283c2c8a6b2d4e65739ec5ce5
--- /dev/null
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp
@@ -0,0 +1,276 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P1ToP2ElementwiseGradientAnnulusMap_1_0::P1ToP2ElementwiseGradientAnnulusMap_1_0(
+    const std::shared_ptr< PrimitiveStorage >& storage,
+    size_t                                     minLevel,
+    size_t                                     maxLevel )
+: Operator( storage, minLevel, maxLevel )
+{}
+
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >& src,
+                                                     const P2Function< real_t >& dst,
+                                                     uint_t                      level,
+                                                     DoFType                     flag,
+                                                     UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_src       = face.getData( src.getFaceDataID() )->getPointer( level );
+         real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         real_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx             = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+            }
+         }
+         for ( const auto& idx : edgedof::macroface::Iterator( level ) )
+         {
+            for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations )
+            {
+               if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) )
+               {
+                  auto arrayIdx           = edgedof::macroface::index( level, idx.x(), idx.y(), orientation );
+                  _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 );
+               }
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_src,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.getVertexDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >(
+          level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                                        const P1Function< idx_t >&                  src,
+                                                        const P2Function< idx_t >&                  dst,
+                                                        uint_t                                      level,
+                                                        DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t* _data_src       = face.getData( src.getFaceDataID() )->getPointer( level );
+         idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
+         idx_t* _data_dstEdge   = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dstEdge,
+             _data_dstVertex,
+             _data_src,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..25d1b2e1ace231eb4d10d33ab62186ad82154828
--- /dev/null
+++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp
@@ -0,0 +1,135 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p1functionspace/P1Function.hpp"
+#include "hyteg/p2functionspace/P2Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Gradient.
+///
+/// Component:    1
+/// Geometry map: AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (scalar space:    Lagrange, degree: 1)
+///     v: test function  (vectorial space: Lagrange, degree: 2)
+///
+///     ∫ - ( ∇ · v ) u
+
+class P1ToP2ElementwiseGradientAnnulusMap_1_0 : public Operator< P1Function< real_t >, P2Function< real_t > >
+{
+ public:
+   P1ToP2ElementwiseGradientAnnulusMap_1_0( const std::shared_ptr< PrimitiveStorage >& storage,
+                                            size_t                                     minLevel,
+                                            size_t                                     maxLevel );
+
+   void apply( const P1Function< real_t >& src,
+               const P2Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P1Function< idx_t >&                  src,
+                  const P2Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    220     330      17      12      3              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dstEdge,
+                        real_t* RESTRICT _data_dstVertex,
+                        real_t* RESTRICT _data_src,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 2 | points: 3, degree: 2
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    202     312      17      12      3              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dstEdge,
+                           idx_t* RESTRICT                      _data_dstVertex,
+                           idx_t* RESTRICT                      _data_src,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b3d7b98a9d37fb1bdbda216fbb42c145902e92e9
--- /dev/null
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,849 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.66666666666666663;
+       const real_t tmp_kernel_op_1 = 2.6666666666666665;
+       const real_t tmp_kernel_op_2 = tmp_kernel_op_0 + tmp_kernel_op_1 - 3.0;
+       const real_t tmp_kernel_op_3 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_4 = -tmp_kernel_op_3;
+       const real_t tmp_kernel_op_18 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_19 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19;
+       const real_t tmp_kernel_op_21 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_22 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_23 = -tmp_kernel_op_22*1.0 / (-tmp_kernel_op_18*tmp_kernel_op_20 + tmp_kernel_op_21*tmp_kernel_op_4);
+       const real_t tmp_kernel_op_24 = tmp_kernel_op_23*1.0;
+       const real_t tmp_kernel_op_28 = -rayVertex_1;
+       const real_t tmp_kernel_op_29 = -rayVertex_0;
+       const real_t tmp_kernel_op_47 = tmp_kernel_op_22*1.0 / (tmp_kernel_op_18*tmp_kernel_op_19 - tmp_kernel_op_21*tmp_kernel_op_3);
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_47*1.0;
+       const real_t tmp_kernel_op_59 = 2.6666666666666665;
+       const real_t tmp_kernel_op_60 = 0.66666666666666663;
+       const real_t tmp_kernel_op_61 = tmp_kernel_op_59 + tmp_kernel_op_60 - 3.0;
+       const real_t tmp_kernel_op_95 = 0.66666666666666663;
+       const real_t tmp_kernel_op_96 = 0.66666666666666663;
+       const real_t tmp_kernel_op_97 = tmp_kernel_op_95 + tmp_kernel_op_96 - 3.0;
+       const real_t tmp_kernel_op_131 = 0.16666666666666674;
+       const real_t tmp_kernel_op_132 = 0.16666666666666671;
+       const real_t tmp_kernel_op_133 = 0.66666666666666674;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d tmp_kernel_op_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_8 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_10 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_11 = _mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_12 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_13 = _mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_15 = _mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_14);
+                const __m256d tmp_kernel_op_16 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_17 = _mm256_add_pd(tmp_kernel_op_15,tmp_kernel_op_16);
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_17)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(tmp_kernel_op_25,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_27 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_17),_mm256_mul_pd(tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_30 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_14),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_27),tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_32 = _mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_25);
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_34 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_33),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_35 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(tmp_kernel_op_16,tmp_kernel_op_33))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_27),tmp_kernel_op_30),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_37 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_35),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_38 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_39 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_40 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_45 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(tmp_kernel_op_42,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_46)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_50 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_44),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_41),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_46),_mm256_mul_pd(tmp_kernel_op_46,tmp_kernel_op_46)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_54 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_55)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_53),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_55,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_53,tmp_kernel_op_56))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2))));
+                const __m256d tmp_kernel_op_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_63 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_64 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_63);
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_66 = _mm256_add_pd(tmp_kernel_op_64,tmp_kernel_op_65);
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_66)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_69 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_66),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_66));
+                const __m256d tmp_kernel_op_70 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_63),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_71 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_72 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_63),tmp_kernel_op_73),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_75 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(tmp_kernel_op_65,tmp_kernel_op_73))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_63),tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_76 = _mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_78 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_79 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_80);
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(tmp_kernel_op_81,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_85 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_84)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_82),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)));
+                const __m256d tmp_kernel_op_88 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_84),_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_84)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_89 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_91)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_89,tmp_kernel_op_92))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61))));
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_98,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_102 = _mm256_add_pd(tmp_kernel_op_100,tmp_kernel_op_101);
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_102)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_105 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_102),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_102));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_99),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_105),tmp_kernel_op_106),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_108 = _mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_98),tmp_kernel_op_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_111 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_108,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_109))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_108,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),tmp_kernel_op_98),tmp_kernel_op_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_115 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_116 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_116);
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_119 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_120 = _mm256_add_pd(tmp_kernel_op_117,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_120)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_122 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_123 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_118),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_120),_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_120)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_125 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_128),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_128))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_127)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_125),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97))));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)));
+                const __m256d tmp_kernel_op_139 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133));
+                const __m256d tmp_kernel_op_143 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)));
+                const __m256d tmp_kernel_op_144 = _mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_145 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)));
+                const __m256d tmp_kernel_op_146 = _mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_147 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)));
+                const __m256d tmp_kernel_op_148 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_147);
+                const __m256d tmp_kernel_op_149 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0));
+                const __m256d tmp_kernel_op_150 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1));
+                const __m256d tmp_kernel_op_151 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_150,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_152 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59));
+                const __m256d tmp_kernel_op_153 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60));
+                const __m256d tmp_kernel_op_154 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_152,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_153,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_155,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_156,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_150));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_153));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_156));
+                const __m256d tmp_kernel_op_161 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_149));
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_152));
+                const __m256d tmp_kernel_op_163 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_155));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_140),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_141),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_142),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_140,tmp_kernel_op_143),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_145),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_147),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t tmp_kernel_op_5 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+                const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_9 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+                const real_t tmp_kernel_op_12 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+                const real_t tmp_kernel_op_14 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663;
+                const real_t tmp_kernel_op_15 = (tmp_kernel_op_14*tmp_kernel_op_14);
+                const real_t tmp_kernel_op_16 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_17 = tmp_kernel_op_15 + tmp_kernel_op_16;
+                const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+                const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_14 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_9));
+                const real_t tmp_kernel_op_31 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_26*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_14*tmp_kernel_op_25;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+                const real_t tmp_kernel_op_34 = -tmp_kernel_op_14*tmp_kernel_op_33*tmp_kernel_op_9 + tmp_kernel_op_20*tmp_kernel_op_26;
+                const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_31*(tmp_kernel_op_16*tmp_kernel_op_33 + tmp_kernel_op_20*tmp_kernel_op_32) - tmp_kernel_op_34*(-tmp_kernel_op_14*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9 - tmp_kernel_op_32*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_36 = tmp_kernel_op_31*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_37 = -tmp_kernel_op_34*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_37;
+                const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_36 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37;
+                const real_t tmp_kernel_op_40 = -p_affine_0_0;
+                const real_t tmp_kernel_op_41 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_40;
+                const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+                const real_t tmp_kernel_op_43 = -p_affine_0_1;
+                const real_t tmp_kernel_op_44 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+                const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+                const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+                const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_51 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_41) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_44);
+                const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+                const real_t tmp_kernel_op_57 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56));
+                const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+                const real_t tmp_kernel_op_62 = p_affine_0_1 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_63 = p_affine_0_0 + tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666;
+                const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+                const real_t tmp_kernel_op_65 = (tmp_kernel_op_62*tmp_kernel_op_62);
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_64 + tmp_kernel_op_65;
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+                const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_63) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_62));
+                const real_t tmp_kernel_op_71 = -tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_73 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_71*(tmp_kernel_op_20*tmp_kernel_op_72 + tmp_kernel_op_65*tmp_kernel_op_73) - tmp_kernel_op_74*(-tmp_kernel_op_4*tmp_kernel_op_72 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70));
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_71*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = -tmp_kernel_op_74*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_76 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_76 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_80 = tmp_kernel_op_10*0.66666666666666663 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+                const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_43 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_80) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_82);
+                const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+                const real_t tmp_kernel_op_93 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92));
+                const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_98 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666;
+                const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+                const real_t tmp_kernel_op_101 = (tmp_kernel_op_98*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_100 + tmp_kernel_op_101;
+                const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+                const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+                const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_99) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_98));
+                const real_t tmp_kernel_op_107 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*1.0 - tmp_kernel_op_104*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_103*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_104*tmp_kernel_op_20 - tmp_kernel_op_109*tmp_kernel_op_98*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_107*(tmp_kernel_op_101*tmp_kernel_op_109 + tmp_kernel_op_108*tmp_kernel_op_20) - tmp_kernel_op_110*(-tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*tmp_kernel_op_99 - tmp_kernel_op_108*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_107*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = -tmp_kernel_op_110*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_112 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_113;
+                const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_112 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_113;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+                const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_123 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_116) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_118);
+                const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+                const real_t tmp_kernel_op_129 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_3)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_19) + (tmp_kernel_op_122*tmp_kernel_op_3 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_19));
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+                const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+                const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+                const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+                const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+                const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+                const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+                const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+                const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+                const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+                const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+                const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+                const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+                const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+                const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d tmp_kernel_op_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_8 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_10 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_11 = _mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_12 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_13 = _mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_15 = _mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_14);
+                const __m256d tmp_kernel_op_16 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_17 = _mm256_add_pd(tmp_kernel_op_15,tmp_kernel_op_16);
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_17)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(tmp_kernel_op_25,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_27 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_17),_mm256_mul_pd(tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_30 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_14),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_31 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_27),tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_32 = _mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_25);
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_34 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_33),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_35 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_31,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(tmp_kernel_op_16,tmp_kernel_op_33))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_32,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_14,tmp_kernel_op_27),tmp_kernel_op_30),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_31,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_37 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_35),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_38 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_39 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_40 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_45 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(tmp_kernel_op_42,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_46)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_50 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_44),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_41),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_46),_mm256_mul_pd(tmp_kernel_op_46,tmp_kernel_op_46)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_54 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_55)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_53),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_55,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_53,tmp_kernel_op_56))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2))));
+                const __m256d tmp_kernel_op_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_63 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_64 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_63);
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_66 = _mm256_add_pd(tmp_kernel_op_64,tmp_kernel_op_65);
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_66)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_69 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_66),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_66));
+                const __m256d tmp_kernel_op_70 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_63),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_71 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_72 = _mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_63),tmp_kernel_op_73),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_75 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_71,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(tmp_kernel_op_65,tmp_kernel_op_73))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_72,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_63),tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_76 = _mm256_mul_pd(tmp_kernel_op_71,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_75),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_78 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_79 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_80);
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(tmp_kernel_op_81,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_85 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_84)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_82),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)));
+                const __m256d tmp_kernel_op_88 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_84),_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_84)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_89 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_91)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_89,tmp_kernel_op_92))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61))));
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_99 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_100 = _mm256_mul_pd(tmp_kernel_op_99,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_98,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_102 = _mm256_add_pd(tmp_kernel_op_100,tmp_kernel_op_101);
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_102)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_105 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_102),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_102));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_99),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_107 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_105),tmp_kernel_op_106),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_108 = _mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_109,tmp_kernel_op_98),tmp_kernel_op_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                const __m256d tmp_kernel_op_111 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_107,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_108,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_109))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_108,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),tmp_kernel_op_98),tmp_kernel_op_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_107,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_115 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_116 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_10,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_116);
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_119 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_120 = _mm256_add_pd(tmp_kernel_op_117,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_120)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_122 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_123 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_118),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_120),_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_120)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_125 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_128),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_128))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_127)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_125),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97))));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)));
+                const __m256d tmp_kernel_op_139 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133));
+                const __m256d tmp_kernel_op_143 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)));
+                const __m256d tmp_kernel_op_144 = _mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_145 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)));
+                const __m256d tmp_kernel_op_146 = _mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_147 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)));
+                const __m256d tmp_kernel_op_148 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_147);
+                const __m256d tmp_kernel_op_149 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0));
+                const __m256d tmp_kernel_op_150 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1));
+                const __m256d tmp_kernel_op_151 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_150,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_152 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59));
+                const __m256d tmp_kernel_op_153 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60));
+                const __m256d tmp_kernel_op_154 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_152,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_153,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_155,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_156,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_150));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_153));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_156));
+                const __m256d tmp_kernel_op_161 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_149));
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_152));
+                const __m256d tmp_kernel_op_163 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_155));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_140),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_141),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_142),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_140,tmp_kernel_op_143),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_145),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_147),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t tmp_kernel_op_5 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+                const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_9 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+                const real_t tmp_kernel_op_12 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+                const real_t tmp_kernel_op_14 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663;
+                const real_t tmp_kernel_op_15 = (tmp_kernel_op_14*tmp_kernel_op_14);
+                const real_t tmp_kernel_op_16 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_17 = tmp_kernel_op_15 + tmp_kernel_op_16;
+                const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+                const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_14 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_9));
+                const real_t tmp_kernel_op_31 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_26*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_14*tmp_kernel_op_25;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+                const real_t tmp_kernel_op_34 = -tmp_kernel_op_14*tmp_kernel_op_33*tmp_kernel_op_9 + tmp_kernel_op_20*tmp_kernel_op_26;
+                const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_31*(tmp_kernel_op_16*tmp_kernel_op_33 + tmp_kernel_op_20*tmp_kernel_op_32) - tmp_kernel_op_34*(-tmp_kernel_op_14*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9 - tmp_kernel_op_32*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_36 = tmp_kernel_op_31*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_37 = -tmp_kernel_op_34*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_37;
+                const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_36 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37;
+                const real_t tmp_kernel_op_40 = -p_affine_0_0;
+                const real_t tmp_kernel_op_41 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_40;
+                const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+                const real_t tmp_kernel_op_43 = -p_affine_0_1;
+                const real_t tmp_kernel_op_44 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+                const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+                const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+                const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_51 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_41) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_44);
+                const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+                const real_t tmp_kernel_op_57 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56));
+                const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+                const real_t tmp_kernel_op_62 = p_affine_0_1 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_63 = p_affine_0_0 + tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666;
+                const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+                const real_t tmp_kernel_op_65 = (tmp_kernel_op_62*tmp_kernel_op_62);
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_64 + tmp_kernel_op_65;
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+                const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_63) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_62));
+                const real_t tmp_kernel_op_71 = -tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_73 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_73;
+                const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_71*(tmp_kernel_op_20*tmp_kernel_op_72 + tmp_kernel_op_65*tmp_kernel_op_73) - tmp_kernel_op_74*(-tmp_kernel_op_4*tmp_kernel_op_72 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70));
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_71*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = -tmp_kernel_op_74*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_76 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_76 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_77;
+                const real_t tmp_kernel_op_80 = tmp_kernel_op_10*0.66666666666666663 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+                const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_43 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_80) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_82);
+                const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+                const real_t tmp_kernel_op_93 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92));
+                const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_98 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666;
+                const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+                const real_t tmp_kernel_op_101 = (tmp_kernel_op_98*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_100 + tmp_kernel_op_101;
+                const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+                const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+                const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_99) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_98));
+                const real_t tmp_kernel_op_107 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*1.0 - tmp_kernel_op_104*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_103*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_104*tmp_kernel_op_20 - tmp_kernel_op_109*tmp_kernel_op_98*tmp_kernel_op_99;
+                const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_107*(tmp_kernel_op_101*tmp_kernel_op_109 + tmp_kernel_op_108*tmp_kernel_op_20) - tmp_kernel_op_110*(-tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*tmp_kernel_op_99 - tmp_kernel_op_108*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_107*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = -tmp_kernel_op_110*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_112 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_113;
+                const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_112 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_113;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+                const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_123 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_116) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_118);
+                const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+                const real_t tmp_kernel_op_129 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_3)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_19) + (tmp_kernel_op_122*tmp_kernel_op_3 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_19));
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+                const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+                const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+                const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+                const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+                const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+                const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+                const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+                const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+                const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+                const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+                const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+                const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+                const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+                const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..af45c3486a304a22febd0c37697e03337fb4cada
--- /dev/null
+++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
@@ -0,0 +1,849 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.66666666666666663;
+       const real_t tmp_kernel_op_1 = 2.6666666666666665;
+       const real_t tmp_kernel_op_2 = tmp_kernel_op_0 + tmp_kernel_op_1 - 3.0;
+       const real_t tmp_kernel_op_3 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_4 = -tmp_kernel_op_3;
+       const real_t tmp_kernel_op_18 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_19 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19;
+       const real_t tmp_kernel_op_21 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_22 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_23 = -tmp_kernel_op_22*1.0 / (-tmp_kernel_op_18*tmp_kernel_op_4 + tmp_kernel_op_20*tmp_kernel_op_21);
+       const real_t tmp_kernel_op_24 = tmp_kernel_op_23*1.0;
+       const real_t tmp_kernel_op_28 = -rayVertex_1;
+       const real_t tmp_kernel_op_29 = -rayVertex_0;
+       const real_t tmp_kernel_op_47 = tmp_kernel_op_22*1.0 / (tmp_kernel_op_18*tmp_kernel_op_3 - tmp_kernel_op_19*tmp_kernel_op_21);
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_47*1.0;
+       const real_t tmp_kernel_op_59 = 2.6666666666666665;
+       const real_t tmp_kernel_op_60 = 0.66666666666666663;
+       const real_t tmp_kernel_op_61 = tmp_kernel_op_59 + tmp_kernel_op_60 - 3.0;
+       const real_t tmp_kernel_op_95 = 0.66666666666666663;
+       const real_t tmp_kernel_op_96 = 0.66666666666666663;
+       const real_t tmp_kernel_op_97 = tmp_kernel_op_95 + tmp_kernel_op_96 - 3.0;
+       const real_t tmp_kernel_op_131 = 0.16666666666666674;
+       const real_t tmp_kernel_op_132 = 0.16666666666666671;
+       const real_t tmp_kernel_op_133 = 0.66666666666666674;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d tmp_kernel_op_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_8 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_10 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_11 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_12 = _mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_13 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_14 = _mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_15 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_14,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_16 = _mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_15);
+                const __m256d tmp_kernel_op_17 = _mm256_add_pd(tmp_kernel_op_10,tmp_kernel_op_16);
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_17)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(tmp_kernel_op_25,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_27 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_17),_mm256_mul_pd(tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_30 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_9),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_15),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_31 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_32 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(tmp_kernel_op_16,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_25);
+                const __m256d tmp_kernel_op_34 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_27),tmp_kernel_op_30),tmp_kernel_op_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_35 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_31),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_32,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_10,tmp_kernel_op_27),tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_37 = _mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_38 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_39 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_40 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_45 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(tmp_kernel_op_42,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_46)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_50 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_44),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_41),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_46),_mm256_mul_pd(tmp_kernel_op_46,tmp_kernel_op_46)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_54 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_55,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_53,tmp_kernel_op_56))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_55)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_53),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2))));
+                const __m256d tmp_kernel_op_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_63 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_14,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_66 = _mm256_add_pd(tmp_kernel_op_63,tmp_kernel_op_65);
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_66)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_69 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_66),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_66));
+                const __m256d tmp_kernel_op_70 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_62),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_64),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_71 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(tmp_kernel_op_65,tmp_kernel_op_71));
+                const __m256d tmp_kernel_op_73 = _mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_64),tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_75 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_73,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_64),tmp_kernel_op_71),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_72,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_73,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_76 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_78 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_79 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_80);
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(tmp_kernel_op_81,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_85 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_84)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_82),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)));
+                const __m256d tmp_kernel_op_88 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_84),_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_84)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_89 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_89,tmp_kernel_op_92))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_91)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61))));
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_99 = _mm256_mul_pd(tmp_kernel_op_98,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_100 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_14,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_100);
+                const __m256d tmp_kernel_op_102 = _mm256_add_pd(tmp_kernel_op_101,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_102)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_105 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_102),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_102));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_98),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_100),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_107 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_108 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_107));
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_103);
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_105),tmp_kernel_op_106),tmp_kernel_op_98),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_111 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_107),tmp_kernel_op_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_108,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),tmp_kernel_op_99),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)));
+                const __m256d tmp_kernel_op_115 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)));
+                const __m256d tmp_kernel_op_116 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_116);
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_119 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_120 = _mm256_add_pd(tmp_kernel_op_117,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_120)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_122 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_123 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_118),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_120),_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_120)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_125 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_128),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_128))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_127)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_125),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97))));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)));
+                const __m256d tmp_kernel_op_139 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133));
+                const __m256d tmp_kernel_op_143 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)));
+                const __m256d tmp_kernel_op_144 = _mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_145 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)));
+                const __m256d tmp_kernel_op_146 = _mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_147 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)));
+                const __m256d tmp_kernel_op_148 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_147);
+                const __m256d tmp_kernel_op_149 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0));
+                const __m256d tmp_kernel_op_150 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1));
+                const __m256d tmp_kernel_op_151 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_150,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_152 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59));
+                const __m256d tmp_kernel_op_153 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60));
+                const __m256d tmp_kernel_op_154 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_152,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_153,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_155,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_156,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_150));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_153));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_156));
+                const __m256d tmp_kernel_op_161 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_149));
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_152));
+                const __m256d tmp_kernel_op_163 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_155));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_140),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_141),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_142),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_140,tmp_kernel_op_143),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_145),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_147),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t tmp_kernel_op_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+                const real_t tmp_kernel_op_7 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_9 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_11 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+                const real_t tmp_kernel_op_13 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+                const real_t tmp_kernel_op_15 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+                const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+                const real_t tmp_kernel_op_17 = tmp_kernel_op_10 + tmp_kernel_op_16;
+                const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+                const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_15 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_9));
+                const real_t tmp_kernel_op_31 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_31 + tmp_kernel_op_26*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_25;
+                const real_t tmp_kernel_op_34 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9*1.0 + tmp_kernel_op_20*tmp_kernel_op_26;
+                const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_10*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_20*tmp_kernel_op_33) + tmp_kernel_op_34*(-tmp_kernel_op_15*tmp_kernel_op_31*tmp_kernel_op_9 + tmp_kernel_op_33*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_36 = tmp_kernel_op_32*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_37 = tmp_kernel_op_34*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+                const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_36;
+                const real_t tmp_kernel_op_40 = -p_affine_0_0;
+                const real_t tmp_kernel_op_41 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+                const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+                const real_t tmp_kernel_op_43 = -p_affine_0_1;
+                const real_t tmp_kernel_op_44 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+                const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+                const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_51 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_44) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_41);
+                const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+                const real_t tmp_kernel_op_57 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53));
+                const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+                const real_t tmp_kernel_op_62 = p_affine_0_0 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_63 = (tmp_kernel_op_62*tmp_kernel_op_62);
+                const real_t tmp_kernel_op_64 = p_affine_0_1 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+                const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_63 + tmp_kernel_op_65;
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+                const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_28 + tmp_kernel_op_64) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_62));
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_65*tmp_kernel_op_71;
+                const real_t tmp_kernel_op_73 = tmp_kernel_op_64*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 + tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_72*(-tmp_kernel_op_20*tmp_kernel_op_73 + tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70*1.0) + tmp_kernel_op_74*(tmp_kernel_op_4*tmp_kernel_op_73 - tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_71));
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_72*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = tmp_kernel_op_74*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_76;
+                const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_76;
+                const real_t tmp_kernel_op_80 = tmp_kernel_op_40 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_82) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_80);
+                const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+                const real_t tmp_kernel_op_93 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89));
+                const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_98 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_99 = (tmp_kernel_op_98*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_100 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+                const real_t tmp_kernel_op_101 = (tmp_kernel_op_100*tmp_kernel_op_100);
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_101 + tmp_kernel_op_99;
+                const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+                const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+                const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_100 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_98));
+                const real_t tmp_kernel_op_107 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_107 + tmp_kernel_op_104*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_100*tmp_kernel_op_103;
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*1.0 + tmp_kernel_op_104*tmp_kernel_op_20;
+                const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_108*(tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_99*1.0 - tmp_kernel_op_109*tmp_kernel_op_20) + tmp_kernel_op_110*(-tmp_kernel_op_100*tmp_kernel_op_107*tmp_kernel_op_98 + tmp_kernel_op_109*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_108*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = tmp_kernel_op_110*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_113 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_112;
+                const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_113 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_112;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_123 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_118) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_116);
+                const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+                const real_t tmp_kernel_op_129 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_19)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_3) + (tmp_kernel_op_122*tmp_kernel_op_19 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_3));
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+                const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+                const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+                const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+                const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+                const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+                const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+                const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+                const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+                const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+                const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+                const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+                const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+                const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+                const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d tmp_kernel_op_5 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_6 = _mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_7 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0);
+                const __m256d tmp_kernel_op_8 = _mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_9 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_10 = _mm256_mul_pd(tmp_kernel_op_9,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_11 = _mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_12 = _mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_13 = _mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1);
+                const __m256d tmp_kernel_op_14 = _mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_15 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_14,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_16 = _mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_15);
+                const __m256d tmp_kernel_op_17 = _mm256_add_pd(tmp_kernel_op_10,tmp_kernel_op_16);
+                const __m256d tmp_kernel_op_25 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_17)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_26 = _mm256_mul_pd(tmp_kernel_op_25,tmp_kernel_op_9);
+                const __m256d tmp_kernel_op_27 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_17),_mm256_mul_pd(tmp_kernel_op_17,tmp_kernel_op_17));
+                const __m256d tmp_kernel_op_30 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_9),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_15),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_31 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_27,tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_32 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(tmp_kernel_op_16,tmp_kernel_op_31));
+                const __m256d tmp_kernel_op_33 = _mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_25);
+                const __m256d tmp_kernel_op_34 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_26,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_27),tmp_kernel_op_30),tmp_kernel_op_9),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_35 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_34,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_15,tmp_kernel_op_31),tmp_kernel_op_9),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_32,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_10,tmp_kernel_op_27),tmp_kernel_op_30),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_36 = _mm256_mul_pd(tmp_kernel_op_32,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_37 = _mm256_mul_pd(tmp_kernel_op_34,tmp_kernel_op_35);
+                const __m256d tmp_kernel_op_38 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_39 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_36,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_40 = _mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_42 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_41);
+                const __m256d tmp_kernel_op_43 = _mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0));
+                const __m256d tmp_kernel_op_44 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_45 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_46 = _mm256_add_pd(tmp_kernel_op_42,tmp_kernel_op_45);
+                const __m256d tmp_kernel_op_49 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_46)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_50 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_44),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_41),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)));
+                const __m256d tmp_kernel_op_52 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_46),_mm256_mul_pd(tmp_kernel_op_46,tmp_kernel_op_46)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_53 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_54 = _mm256_mul_pd(tmp_kernel_op_44,tmp_kernel_op_49);
+                const __m256d tmp_kernel_op_55 = _mm256_mul_pd(tmp_kernel_op_52,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_51,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_56 = _mm256_mul_pd(tmp_kernel_op_41,tmp_kernel_op_44);
+                const __m256d tmp_kernel_op_57 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_55,tmp_kernel_op_56),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_53,tmp_kernel_op_56))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_54,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_42,tmp_kernel_op_55)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_50,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_45,tmp_kernel_op_53),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_58 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2,tmp_kernel_op_2))));
+                const __m256d tmp_kernel_op_62 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_0);
+                const __m256d tmp_kernel_op_63 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_62);
+                const __m256d tmp_kernel_op_64 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_14,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),p_affine_0_1);
+                const __m256d tmp_kernel_op_65 = _mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_64);
+                const __m256d tmp_kernel_op_66 = _mm256_add_pd(tmp_kernel_op_63,tmp_kernel_op_65);
+                const __m256d tmp_kernel_op_67 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_66)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_68 = _mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_69 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_66),_mm256_mul_pd(tmp_kernel_op_66,tmp_kernel_op_66));
+                const __m256d tmp_kernel_op_70 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_62),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_64),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_71 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_69,tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_72 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(tmp_kernel_op_65,tmp_kernel_op_71));
+                const __m256d tmp_kernel_op_73 = _mm256_mul_pd(tmp_kernel_op_64,tmp_kernel_op_67);
+                const __m256d tmp_kernel_op_74 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_68,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_64),tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_75 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_74,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_73,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_62,tmp_kernel_op_64),tmp_kernel_op_71),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_72,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_73,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_63,tmp_kernel_op_69),tmp_kernel_op_70),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_76 = _mm256_mul_pd(tmp_kernel_op_72,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_77 = _mm256_mul_pd(tmp_kernel_op_74,tmp_kernel_op_75);
+                const __m256d tmp_kernel_op_78 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_79 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_77,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_76,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_81 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_80);
+                const __m256d tmp_kernel_op_82 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_83 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_84 = _mm256_add_pd(tmp_kernel_op_81,tmp_kernel_op_83);
+                const __m256d tmp_kernel_op_85 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_84)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_86 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_82),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_80),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)));
+                const __m256d tmp_kernel_op_88 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_84),_mm256_mul_pd(tmp_kernel_op_84,tmp_kernel_op_84)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_89 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_90 = _mm256_mul_pd(tmp_kernel_op_82,tmp_kernel_op_85);
+                const __m256d tmp_kernel_op_91 = _mm256_mul_pd(tmp_kernel_op_88,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_87,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_92 = _mm256_mul_pd(tmp_kernel_op_80,tmp_kernel_op_82);
+                const __m256d tmp_kernel_op_93 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_91,tmp_kernel_op_92),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_89,tmp_kernel_op_92))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_90,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_81,tmp_kernel_op_91)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_86,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_83,tmp_kernel_op_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_94 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61,tmp_kernel_op_61))));
+                const __m256d tmp_kernel_op_98 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_6,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_8,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_0);
+                const __m256d tmp_kernel_op_99 = _mm256_mul_pd(tmp_kernel_op_98,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_100 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_12,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_14,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),p_affine_0_1);
+                const __m256d tmp_kernel_op_101 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_100);
+                const __m256d tmp_kernel_op_102 = _mm256_add_pd(tmp_kernel_op_101,tmp_kernel_op_99);
+                const __m256d tmp_kernel_op_103 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_102)),_mm256_set_pd(tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24,tmp_kernel_op_24));
+                const __m256d tmp_kernel_op_104 = _mm256_mul_pd(tmp_kernel_op_103,tmp_kernel_op_98);
+                const __m256d tmp_kernel_op_105 = _mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_102),_mm256_mul_pd(tmp_kernel_op_102,tmp_kernel_op_102));
+                const __m256d tmp_kernel_op_106 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29,tmp_kernel_op_29),tmp_kernel_op_98),_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28,tmp_kernel_op_28),tmp_kernel_op_100),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20))),_mm256_set_pd(tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23,tmp_kernel_op_23)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex));
+                const __m256d tmp_kernel_op_107 = _mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_108 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(tmp_kernel_op_101,tmp_kernel_op_107));
+                const __m256d tmp_kernel_op_109 = _mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_103);
+                const __m256d tmp_kernel_op_110 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_104,_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_105),tmp_kernel_op_106),tmp_kernel_op_98),_mm256_set_pd(1.0,1.0,1.0,1.0)));
+                const __m256d tmp_kernel_op_111 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_110,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_109,_mm256_set_pd(tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4,tmp_kernel_op_4)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_100,tmp_kernel_op_107),tmp_kernel_op_98),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_mul_pd(tmp_kernel_op_108,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20,tmp_kernel_op_20)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_105,tmp_kernel_op_106),tmp_kernel_op_99),_mm256_set_pd(1.0,1.0,1.0,1.0))))));
+                const __m256d tmp_kernel_op_112 = _mm256_mul_pd(tmp_kernel_op_108,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_113 = _mm256_mul_pd(tmp_kernel_op_110,tmp_kernel_op_111);
+                const __m256d tmp_kernel_op_114 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)));
+                const __m256d tmp_kernel_op_115 = _mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_113,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(tmp_kernel_op_112,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)));
+                const __m256d tmp_kernel_op_116 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_5,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_7,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_40);
+                const __m256d tmp_kernel_op_117 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_116);
+                const __m256d tmp_kernel_op_118 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_11,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_13,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),tmp_kernel_op_43);
+                const __m256d tmp_kernel_op_119 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_120 = _mm256_add_pd(tmp_kernel_op_117,tmp_kernel_op_119);
+                const __m256d tmp_kernel_op_121 = _mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_kernel_op_120)),_mm256_set_pd(tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48,tmp_kernel_op_48));
+                const __m256d tmp_kernel_op_122 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_123 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_kernel_op_118),_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_kernel_op_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)));
+                const __m256d tmp_kernel_op_124 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_kernel_op_120),_mm256_mul_pd(tmp_kernel_op_120,tmp_kernel_op_120)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                const __m256d tmp_kernel_op_125 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_126 = _mm256_mul_pd(tmp_kernel_op_118,tmp_kernel_op_121);
+                const __m256d tmp_kernel_op_127 = _mm256_mul_pd(tmp_kernel_op_124,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_123,_mm256_set_pd(tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47,tmp_kernel_op_47)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                const __m256d tmp_kernel_op_128 = _mm256_mul_pd(tmp_kernel_op_116,tmp_kernel_op_118);
+                const __m256d tmp_kernel_op_129 = _mm256_mul_pd(_mm256_mul_pd(_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_127,tmp_kernel_op_128),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(tmp_kernel_op_125,tmp_kernel_op_128))),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_126,_mm256_set_pd(tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19,tmp_kernel_op_19)),_mm256_mul_pd(tmp_kernel_op_117,tmp_kernel_op_127)),_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_122,_mm256_set_pd(tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3,tmp_kernel_op_3)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_119,tmp_kernel_op_125),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))));
+                const __m256d tmp_kernel_op_130 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97,tmp_kernel_op_97))));
+                const __m256d tmp_kernel_op_134 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)));
+                const __m256d tmp_kernel_op_135 = _mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_136 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)));
+                const __m256d tmp_kernel_op_137 = _mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_138 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)));
+                const __m256d tmp_kernel_op_139 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_138);
+                const __m256d tmp_kernel_op_140 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131));
+                const __m256d tmp_kernel_op_141 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132));
+                const __m256d tmp_kernel_op_142 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133));
+                const __m256d tmp_kernel_op_143 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)));
+                const __m256d tmp_kernel_op_144 = _mm256_mul_pd(tmp_kernel_op_143,tmp_kernel_op_57);
+                const __m256d tmp_kernel_op_145 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)));
+                const __m256d tmp_kernel_op_146 = _mm256_mul_pd(tmp_kernel_op_145,tmp_kernel_op_93);
+                const __m256d tmp_kernel_op_147 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)));
+                const __m256d tmp_kernel_op_148 = _mm256_mul_pd(tmp_kernel_op_129,tmp_kernel_op_147);
+                const __m256d tmp_kernel_op_149 = _mm256_mul_pd(tmp_kernel_op_39,_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0));
+                const __m256d tmp_kernel_op_150 = _mm256_mul_pd(tmp_kernel_op_38,_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1));
+                const __m256d tmp_kernel_op_151 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_149,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_150,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_152 = _mm256_mul_pd(tmp_kernel_op_79,_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59));
+                const __m256d tmp_kernel_op_153 = _mm256_mul_pd(tmp_kernel_op_78,_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60));
+                const __m256d tmp_kernel_op_154 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_152,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_153,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_155 = _mm256_mul_pd(tmp_kernel_op_115,_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95));
+                const __m256d tmp_kernel_op_156 = _mm256_mul_pd(tmp_kernel_op_114,_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96));
+                const __m256d tmp_kernel_op_157 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_155,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_kernel_op_156,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))));
+                const __m256d tmp_kernel_op_158 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_39,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0,tmp_kernel_op_0)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_150));
+                const __m256d tmp_kernel_op_159 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_79,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59,tmp_kernel_op_59)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_153));
+                const __m256d tmp_kernel_op_160 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_115,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95,tmp_kernel_op_95)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_156));
+                const __m256d tmp_kernel_op_161 = _mm256_mul_pd(tmp_kernel_op_57,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_38,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1,tmp_kernel_op_1)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_149));
+                const __m256d tmp_kernel_op_162 = _mm256_mul_pd(tmp_kernel_op_93,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_78,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60,tmp_kernel_op_60)),_mm256_set_pd(-1.333333333333333,-1.333333333333333,-1.333333333333333,-1.333333333333333))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_152));
+                const __m256d tmp_kernel_op_163 = _mm256_mul_pd(tmp_kernel_op_129,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_114,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96,tmp_kernel_op_96)),_mm256_set_pd(2.666666666666667,2.666666666666667,2.666666666666667,2.666666666666667))),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_kernel_op_155));
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_58,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_94,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_130,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_137,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_139,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_135,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_134,tmp_kernel_op_140),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_136,tmp_kernel_op_141),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_138,tmp_kernel_op_142),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_146,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_148,_mm256_set_pd(-0.16666666666666666,-0.16666666666666666,-0.16666666666666666,-0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_144,_mm256_set_pd(-0.66666666666666663,-0.66666666666666663,-0.66666666666666663,-0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_140,tmp_kernel_op_143),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_141,tmp_kernel_op_145),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(tmp_kernel_op_142,tmp_kernel_op_147),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))));
+                const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_151,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_154,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_157,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_158,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_159,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_160,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(src_dof_1,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663)))),_mm256_mul_pd(src_dof_2,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666)),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666))),_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(0.66666666666666663,0.66666666666666663,0.66666666666666663,0.66666666666666663))))),_mm256_mul_pd(src_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_kernel_op_161,_mm256_set_pd(tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131,tmp_kernel_op_131)),_mm256_mul_pd(tmp_kernel_op_162,_mm256_set_pd(tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132,tmp_kernel_op_132))),_mm256_mul_pd(tmp_kernel_op_163,_mm256_set_pd(tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133,tmp_kernel_op_133)))));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t tmp_kernel_op_5 = p_affine_0_0 - p_affine_1_0;
+                const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+                const real_t tmp_kernel_op_7 = p_affine_0_0 - p_affine_2_0;
+                const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+                const real_t tmp_kernel_op_9 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+                const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+                const real_t tmp_kernel_op_11 = p_affine_0_1 - p_affine_1_1;
+                const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+                const real_t tmp_kernel_op_13 = p_affine_0_1 - p_affine_2_1;
+                const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+                const real_t tmp_kernel_op_15 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+                const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+                const real_t tmp_kernel_op_17 = tmp_kernel_op_10 + tmp_kernel_op_16;
+                const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+                const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+                const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_15 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_9));
+                const real_t tmp_kernel_op_31 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+                const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_31 + tmp_kernel_op_26*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_25;
+                const real_t tmp_kernel_op_34 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9*1.0 + tmp_kernel_op_20*tmp_kernel_op_26;
+                const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_10*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_20*tmp_kernel_op_33) + tmp_kernel_op_34*(-tmp_kernel_op_15*tmp_kernel_op_31*tmp_kernel_op_9 + tmp_kernel_op_33*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_36 = tmp_kernel_op_32*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_37 = tmp_kernel_op_34*tmp_kernel_op_35;
+                const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+                const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_36;
+                const real_t tmp_kernel_op_40 = -p_affine_0_0;
+                const real_t tmp_kernel_op_41 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+                const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+                const real_t tmp_kernel_op_43 = -p_affine_0_1;
+                const real_t tmp_kernel_op_44 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+                const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+                const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_51 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_44) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_41);
+                const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+                const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+                const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+                const real_t tmp_kernel_op_57 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53));
+                const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+                const real_t tmp_kernel_op_62 = p_affine_0_0 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_63 = (tmp_kernel_op_62*tmp_kernel_op_62);
+                const real_t tmp_kernel_op_64 = p_affine_0_1 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+                const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+                const real_t tmp_kernel_op_66 = tmp_kernel_op_63 + tmp_kernel_op_65;
+                const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+                const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+                const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_28 + tmp_kernel_op_64) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_62));
+                const real_t tmp_kernel_op_71 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_72 = tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_65*tmp_kernel_op_71;
+                const real_t tmp_kernel_op_73 = tmp_kernel_op_64*tmp_kernel_op_67;
+                const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 + tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+                const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_72*(-tmp_kernel_op_20*tmp_kernel_op_73 + tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70*1.0) + tmp_kernel_op_74*(tmp_kernel_op_4*tmp_kernel_op_73 - tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_71));
+                const real_t tmp_kernel_op_76 = tmp_kernel_op_72*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_77 = tmp_kernel_op_74*tmp_kernel_op_75;
+                const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_76;
+                const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_76;
+                const real_t tmp_kernel_op_80 = tmp_kernel_op_40 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+                const real_t tmp_kernel_op_82 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+                const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+                const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+                const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_87 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_82) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_80);
+                const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+                const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+                const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+                const real_t tmp_kernel_op_93 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89));
+                const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+                const real_t tmp_kernel_op_98 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+                const real_t tmp_kernel_op_99 = (tmp_kernel_op_98*tmp_kernel_op_98);
+                const real_t tmp_kernel_op_100 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+                const real_t tmp_kernel_op_101 = (tmp_kernel_op_100*tmp_kernel_op_100);
+                const real_t tmp_kernel_op_102 = tmp_kernel_op_101 + tmp_kernel_op_99;
+                const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+                const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+                const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+                const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_100 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_98));
+                const real_t tmp_kernel_op_107 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+                const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_107 + tmp_kernel_op_104*tmp_kernel_op_4;
+                const real_t tmp_kernel_op_109 = tmp_kernel_op_100*tmp_kernel_op_103;
+                const real_t tmp_kernel_op_110 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*1.0 + tmp_kernel_op_104*tmp_kernel_op_20;
+                const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_108*(tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_99*1.0 - tmp_kernel_op_109*tmp_kernel_op_20) + tmp_kernel_op_110*(-tmp_kernel_op_100*tmp_kernel_op_107*tmp_kernel_op_98 + tmp_kernel_op_109*tmp_kernel_op_4));
+                const real_t tmp_kernel_op_112 = tmp_kernel_op_108*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_113 = tmp_kernel_op_110*tmp_kernel_op_111;
+                const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_113 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_112;
+                const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_113 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_112;
+                const real_t tmp_kernel_op_116 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+                const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+                const real_t tmp_kernel_op_118 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+                const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+                const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+                const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+                const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_123 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_118) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_116);
+                const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+                const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+                const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+                const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+                const real_t tmp_kernel_op_129 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_19)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_3) + (tmp_kernel_op_122*tmp_kernel_op_19 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_3));
+                const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+                const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+                const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+                const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+                const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+                const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+                const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+                const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+                const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+                const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+                const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+                const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+                const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+                const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+                const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+                const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+                const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+                const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+                const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+                const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+                const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+                const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+                const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+                const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+                const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+                const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+                const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+                const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+                const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+                const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+                const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+                const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+                const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+                _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+                _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..30d72c44d9a3f551c7e4ab049168d9f74148dcc1
--- /dev/null
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp
@@ -0,0 +1,493 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.66666666666666663;
+       const real_t tmp_kernel_op_1 = 2.6666666666666665;
+       const real_t tmp_kernel_op_2 = tmp_kernel_op_0 + tmp_kernel_op_1 - 3.0;
+       const real_t tmp_kernel_op_3 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_4 = -tmp_kernel_op_3;
+       const real_t tmp_kernel_op_18 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_19 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19;
+       const real_t tmp_kernel_op_21 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_22 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_23 = -tmp_kernel_op_22*1.0 / (-tmp_kernel_op_18*tmp_kernel_op_20 + tmp_kernel_op_21*tmp_kernel_op_4);
+       const real_t tmp_kernel_op_24 = tmp_kernel_op_23*1.0;
+       const real_t tmp_kernel_op_28 = -rayVertex_1;
+       const real_t tmp_kernel_op_29 = -rayVertex_0;
+       const real_t tmp_kernel_op_47 = tmp_kernel_op_22*1.0 / (tmp_kernel_op_18*tmp_kernel_op_19 - tmp_kernel_op_21*tmp_kernel_op_3);
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_47*1.0;
+       const real_t tmp_kernel_op_59 = 2.6666666666666665;
+       const real_t tmp_kernel_op_60 = 0.66666666666666663;
+       const real_t tmp_kernel_op_61 = tmp_kernel_op_59 + tmp_kernel_op_60 - 3.0;
+       const real_t tmp_kernel_op_95 = 0.66666666666666663;
+       const real_t tmp_kernel_op_96 = 0.66666666666666663;
+       const real_t tmp_kernel_op_97 = tmp_kernel_op_95 + tmp_kernel_op_96 - 3.0;
+       const real_t tmp_kernel_op_131 = 0.16666666666666674;
+       const real_t tmp_kernel_op_132 = 0.16666666666666671;
+       const real_t tmp_kernel_op_133 = 0.66666666666666674;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t tmp_kernel_op_5 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+             const real_t tmp_kernel_op_14 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663;
+             const real_t tmp_kernel_op_15 = (tmp_kernel_op_14*tmp_kernel_op_14);
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_15 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_14 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_14*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_34 = -tmp_kernel_op_14*tmp_kernel_op_33*tmp_kernel_op_9 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_31*(tmp_kernel_op_16*tmp_kernel_op_33 + tmp_kernel_op_20*tmp_kernel_op_32) - tmp_kernel_op_34*(-tmp_kernel_op_14*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9 - tmp_kernel_op_32*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_31*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = -tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_36 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_36 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_40 = -p_affine_0_0;
+             const real_t tmp_kernel_op_41 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_40;
+             const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+             const real_t tmp_kernel_op_43 = -p_affine_0_1;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+             const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_41) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_44);
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+             const real_t tmp_kernel_op_57 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56));
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+             const real_t tmp_kernel_op_62 = p_affine_0_1 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_63 = p_affine_0_0 + tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_62*tmp_kernel_op_62);
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_64 + tmp_kernel_op_65;
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+             const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_63) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_62));
+             const real_t tmp_kernel_op_71 = -tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_71*(tmp_kernel_op_20*tmp_kernel_op_72 + tmp_kernel_op_65*tmp_kernel_op_73) - tmp_kernel_op_74*(-tmp_kernel_op_4*tmp_kernel_op_72 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70));
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_71*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = -tmp_kernel_op_74*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_76 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_76 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_10*0.66666666666666663 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+             const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_43 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_80) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_82);
+             const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+             const real_t tmp_kernel_op_93 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92));
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_98 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+             const real_t tmp_kernel_op_101 = (tmp_kernel_op_98*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_100 + tmp_kernel_op_101;
+             const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+             const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_99) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_98));
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*1.0 - tmp_kernel_op_104*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_103*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_104*tmp_kernel_op_20 - tmp_kernel_op_109*tmp_kernel_op_98*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_107*(tmp_kernel_op_101*tmp_kernel_op_109 + tmp_kernel_op_108*tmp_kernel_op_20) - tmp_kernel_op_110*(-tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*tmp_kernel_op_99 - tmp_kernel_op_108*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_107*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = -tmp_kernel_op_110*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_112 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_112 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+             const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_116) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_118);
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+             const real_t tmp_kernel_op_129 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_3)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_19) + (tmp_kernel_op_122*tmp_kernel_op_3 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_19));
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+             const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+             const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+             const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+             const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+             const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t tmp_kernel_op_5 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_11 = -tmp_kernel_op_10;
+             const real_t tmp_kernel_op_12 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+             const real_t tmp_kernel_op_14 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663;
+             const real_t tmp_kernel_op_15 = (tmp_kernel_op_14*tmp_kernel_op_14);
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_15 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_14 + tmp_kernel_op_29) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_14*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_34 = -tmp_kernel_op_14*tmp_kernel_op_33*tmp_kernel_op_9 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_31*(tmp_kernel_op_16*tmp_kernel_op_33 + tmp_kernel_op_20*tmp_kernel_op_32) - tmp_kernel_op_34*(-tmp_kernel_op_14*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9 - tmp_kernel_op_32*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_31*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = -tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_36 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_36 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_40 = -p_affine_0_0;
+             const real_t tmp_kernel_op_41 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_40;
+             const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+             const real_t tmp_kernel_op_43 = -p_affine_0_1;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+             const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_41) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_44);
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+             const real_t tmp_kernel_op_57 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56));
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+             const real_t tmp_kernel_op_62 = p_affine_0_1 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_63 = p_affine_0_0 + tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_64 = (tmp_kernel_op_63*tmp_kernel_op_63);
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_62*tmp_kernel_op_62);
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_64 + tmp_kernel_op_65;
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+             const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_63) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_62));
+             const real_t tmp_kernel_op_71 = -tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_63*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_71*(tmp_kernel_op_20*tmp_kernel_op_72 + tmp_kernel_op_65*tmp_kernel_op_73) - tmp_kernel_op_74*(-tmp_kernel_op_4*tmp_kernel_op_72 - tmp_kernel_op_62*tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70));
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_71*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = -tmp_kernel_op_74*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_76 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_76 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_10*0.66666666666666663 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+             const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_43 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_80) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_82);
+             const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+             const real_t tmp_kernel_op_93 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92));
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_98 = p_affine_0_1 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_99 = p_affine_0_0 + tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666;
+             const real_t tmp_kernel_op_100 = (tmp_kernel_op_99*tmp_kernel_op_99);
+             const real_t tmp_kernel_op_101 = (tmp_kernel_op_98*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_100 + tmp_kernel_op_101;
+             const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+             const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(tmp_kernel_op_20*(tmp_kernel_op_29 + tmp_kernel_op_99) - tmp_kernel_op_4*(tmp_kernel_op_28 + tmp_kernel_op_98));
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*1.0 - tmp_kernel_op_104*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_103*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_104*tmp_kernel_op_20 - tmp_kernel_op_109*tmp_kernel_op_98*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_107*(tmp_kernel_op_101*tmp_kernel_op_109 + tmp_kernel_op_108*tmp_kernel_op_20) - tmp_kernel_op_110*(-tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*tmp_kernel_op_99 - tmp_kernel_op_108*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_107*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = -tmp_kernel_op_110*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_112 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_112 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_113;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_10*0.16666666666666666 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_40;
+             const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_43 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = -tmp_kernel_op_19*(rayVertex_0 + tmp_kernel_op_116) + tmp_kernel_op_3*(rayVertex_1 + tmp_kernel_op_118);
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+             const real_t tmp_kernel_op_129 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_3)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_19) + (tmp_kernel_op_122*tmp_kernel_op_3 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_19));
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+             const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+             const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+             const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+             const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+             const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..495f5ebd8d186a4bc17e54769c37596d8e103c09
--- /dev/null
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,579 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.16666666666666674;
+       const real_t tmp_kernel_op_1 = 0.66666666666666663;
+       const real_t tmp_kernel_op_2 = 2.6666666666666665;
+       const real_t tmp_kernel_op_3 = tmp_kernel_op_1 + tmp_kernel_op_2 - 3.0;
+       const real_t tmp_kernel_op_4 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+       const real_t tmp_kernel_op_19 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_20 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_21 = -tmp_kernel_op_20;
+       const real_t tmp_kernel_op_22 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_23 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_24 = -tmp_kernel_op_23*1.0 / (-tmp_kernel_op_19*tmp_kernel_op_21 + tmp_kernel_op_22*tmp_kernel_op_5);
+       const real_t tmp_kernel_op_25 = tmp_kernel_op_24*1.0;
+       const real_t tmp_kernel_op_29 = -rayVertex_1;
+       const real_t tmp_kernel_op_30 = -rayVertex_0;
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_23*1.0 / (tmp_kernel_op_19*tmp_kernel_op_20 - tmp_kernel_op_22*tmp_kernel_op_4);
+       const real_t tmp_kernel_op_49 = tmp_kernel_op_48*1.0;
+       const real_t tmp_kernel_op_60 = 0.16666666666666671;
+       const real_t tmp_kernel_op_61 = 2.6666666666666665;
+       const real_t tmp_kernel_op_62 = 0.66666666666666663;
+       const real_t tmp_kernel_op_63 = tmp_kernel_op_61 + tmp_kernel_op_62 - 3.0;
+       const real_t tmp_kernel_op_97 = 0.66666666666666674;
+       const real_t tmp_kernel_op_98 = 0.66666666666666663;
+       const real_t tmp_kernel_op_99 = 0.66666666666666663;
+       const real_t tmp_kernel_op_100 = tmp_kernel_op_98 + tmp_kernel_op_99 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_7 = -tmp_kernel_op_6;
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.66666666666666663;
+             const real_t tmp_kernel_op_11 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+             const real_t tmp_kernel_op_13 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+             const real_t tmp_kernel_op_15 = p_affine_0_0 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+             const real_t tmp_kernel_op_17 = (tmp_kernel_op_10*tmp_kernel_op_10);
+             const real_t tmp_kernel_op_18 = tmp_kernel_op_16 + tmp_kernel_op_17;
+             const real_t tmp_kernel_op_26 = pow(tmp_kernel_op_18, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_10*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_28 = pow(tmp_kernel_op_18, -1.5000000000000000);
+             const real_t tmp_kernel_op_31 = radRayVertex + tmp_kernel_op_24*(tmp_kernel_op_21*(tmp_kernel_op_15 + tmp_kernel_op_30) - tmp_kernel_op_5*(tmp_kernel_op_10 + tmp_kernel_op_29));
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_28*tmp_kernel_op_31*1.0 - tmp_kernel_op_27*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_28*tmp_kernel_op_31*1.0;
+             const real_t tmp_kernel_op_35 = -tmp_kernel_op_10*tmp_kernel_op_15*tmp_kernel_op_34 + tmp_kernel_op_21*tmp_kernel_op_27;
+             const real_t tmp_kernel_op_36 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_17*tmp_kernel_op_34 + tmp_kernel_op_21*tmp_kernel_op_33) - tmp_kernel_op_35*(-tmp_kernel_op_10*tmp_kernel_op_15*tmp_kernel_op_28*tmp_kernel_op_31 - tmp_kernel_op_33*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_32*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_38 = -tmp_kernel_op_35*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_40 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_44 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = -tmp_kernel_op_20*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_4*(rayVertex_1 + tmp_kernel_op_45);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_4*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_20*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_4*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*(-tmp_kernel_op_3*tmp_kernel_op_39 - tmp_kernel_op_3*tmp_kernel_op_40);
+             const real_t tmp_kernel_op_64 = p_affine_0_1 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = p_affine_0_0 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_66 = (tmp_kernel_op_65*tmp_kernel_op_65);
+             const real_t tmp_kernel_op_67 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_66 + tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_25*pow(tmp_kernel_op_68, -0.50000000000000000);
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_64*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_71 = pow(tmp_kernel_op_68, -1.5000000000000000);
+             const real_t tmp_kernel_op_72 = radRayVertex + tmp_kernel_op_24*(tmp_kernel_op_21*(tmp_kernel_op_30 + tmp_kernel_op_65) - tmp_kernel_op_5*(tmp_kernel_op_29 + tmp_kernel_op_64));
+             const real_t tmp_kernel_op_73 = -tmp_kernel_op_5*tmp_kernel_op_70 + tmp_kernel_op_66*tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_65*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_21*tmp_kernel_op_70 - tmp_kernel_op_64*tmp_kernel_op_65*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = 1.0 / (tmp_kernel_op_73*(tmp_kernel_op_21*tmp_kernel_op_74 + tmp_kernel_op_67*tmp_kernel_op_75) - tmp_kernel_op_76*(-tmp_kernel_op_5*tmp_kernel_op_74 - tmp_kernel_op_64*tmp_kernel_op_65*tmp_kernel_op_71*tmp_kernel_op_72));
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_73*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = -tmp_kernel_op_76*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_78 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_81 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_78 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_44 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = -tmp_kernel_op_20*(rayVertex_0 + tmp_kernel_op_82) + tmp_kernel_op_4*(rayVertex_1 + tmp_kernel_op_84);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91)*(tmp_kernel_op_4*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93) - (tmp_kernel_op_20*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94)*(tmp_kernel_op_4*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*(-tmp_kernel_op_63*tmp_kernel_op_80 - tmp_kernel_op_63*tmp_kernel_op_81);
+             const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_102 = p_affine_0_0 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_103 = (tmp_kernel_op_102*tmp_kernel_op_102);
+             const real_t tmp_kernel_op_104 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_103 + tmp_kernel_op_104;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_105, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_101*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_108 = pow(tmp_kernel_op_105, -1.5000000000000000);
+             const real_t tmp_kernel_op_109 = radRayVertex + tmp_kernel_op_24*(tmp_kernel_op_21*(tmp_kernel_op_102 + tmp_kernel_op_30) - tmp_kernel_op_5*(tmp_kernel_op_101 + tmp_kernel_op_29));
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_103*tmp_kernel_op_108*tmp_kernel_op_109*1.0 - tmp_kernel_op_107*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_102*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_108*tmp_kernel_op_109*1.0;
+             const real_t tmp_kernel_op_113 = -tmp_kernel_op_101*tmp_kernel_op_102*tmp_kernel_op_112 + tmp_kernel_op_107*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_110*(tmp_kernel_op_104*tmp_kernel_op_112 + tmp_kernel_op_111*tmp_kernel_op_21) - tmp_kernel_op_113*(-tmp_kernel_op_101*tmp_kernel_op_102*tmp_kernel_op_108*tmp_kernel_op_109 - tmp_kernel_op_111*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_110*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_116 = -tmp_kernel_op_113*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_115 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_116;
+             const real_t tmp_kernel_op_118 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_115 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_116;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_44 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = -tmp_kernel_op_20*(rayVertex_0 + tmp_kernel_op_119) + tmp_kernel_op_4*(rayVertex_1 + tmp_kernel_op_121);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_4)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_20) + (tmp_kernel_op_125*tmp_kernel_op_4 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_20));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*(-tmp_kernel_op_100*tmp_kernel_op_117 - tmp_kernel_op_100*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_0*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_60*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_132*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_135*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_137*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_132*tmp_kernel_op_139;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_40*(tmp_kernel_op_2 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_81*(tmp_kernel_op_62 - 1.0);
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_118*(tmp_kernel_op_99 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_143*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_144*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_132*tmp_kernel_op_145;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_1*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_58*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_61*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_95*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_118*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*(tmp_kernel_op_150 - tmp_kernel_op_40*(-tmp_kernel_op_1 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*(tmp_kernel_op_153 - tmp_kernel_op_81*(-tmp_kernel_op_61 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*(-tmp_kernel_op_118*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_58*(tmp_kernel_op_149 - tmp_kernel_op_39*(-tmp_kernel_op_2 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_95*(tmp_kernel_op_152 - tmp_kernel_op_80*(-tmp_kernel_op_62 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_132*(-tmp_kernel_op_117*(-tmp_kernel_op_99 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMat_0_0 = tmp_kernel_op_0*tmp_kernel_op_59 + tmp_kernel_op_133*tmp_kernel_op_97 + tmp_kernel_op_60*tmp_kernel_op_96;
+             const real_t elMat_0_1 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.16666666666666666 + tmp_kernel_op_96*0.66666666666666663;
+             const real_t elMat_0_2 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.66666666666666663 + tmp_kernel_op_96*0.16666666666666666;
+             const real_t elMat_1_0 = -tmp_kernel_op_134*tmp_kernel_op_135 - tmp_kernel_op_136*tmp_kernel_op_137 - tmp_kernel_op_138*tmp_kernel_op_139;
+             const real_t elMat_1_1 = tmp_kernel_op_140*-0.16666666666666666 + tmp_kernel_op_141*-0.66666666666666663 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_1_2 = tmp_kernel_op_140*-0.66666666666666663 + tmp_kernel_op_141*-0.16666666666666666 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_2_0 = -tmp_kernel_op_134*tmp_kernel_op_143 - tmp_kernel_op_136*tmp_kernel_op_144 - tmp_kernel_op_138*tmp_kernel_op_145;
+             const real_t elMat_2_1 = tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_147*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_2_2 = tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_147*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_3_0 = tmp_kernel_op_0*tmp_kernel_op_151 + tmp_kernel_op_154*tmp_kernel_op_60 + tmp_kernel_op_157*tmp_kernel_op_97;
+             const real_t elMat_3_1 = tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_3_2 = tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_4_0 = tmp_kernel_op_0*tmp_kernel_op_158 + tmp_kernel_op_159*tmp_kernel_op_60 + tmp_kernel_op_160*tmp_kernel_op_97;
+             const real_t elMat_4_1 = tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_4_2 = tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_5_0 = tmp_kernel_op_0*tmp_kernel_op_161 + tmp_kernel_op_162*tmp_kernel_op_60 + tmp_kernel_op_163*tmp_kernel_op_97;
+             const real_t elMat_5_1 = tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666;
+             const real_t elMat_5_2 = tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 3 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_1_0));
+             _data_mat[4] = ((real_t)(elMat_1_1));
+             _data_mat[5] = ((real_t)(elMat_1_2));
+             _data_mat[6] = ((real_t)(elMat_2_0));
+             _data_mat[7] = ((real_t)(elMat_2_1));
+             _data_mat[8] = ((real_t)(elMat_2_2));
+             _data_mat[9] = ((real_t)(elMat_3_0));
+             _data_mat[10] = ((real_t)(elMat_3_1));
+             _data_mat[11] = ((real_t)(elMat_3_2));
+             _data_mat[12] = ((real_t)(elMat_4_0));
+             _data_mat[13] = ((real_t)(elMat_4_1));
+             _data_mat[14] = ((real_t)(elMat_4_2));
+             _data_mat[15] = ((real_t)(elMat_5_0));
+             _data_mat[16] = ((real_t)(elMat_5_1));
+             _data_mat[17] = ((real_t)(elMat_5_2));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_6 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_7 = -tmp_kernel_op_6;
+             const real_t tmp_kernel_op_8 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_1 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.66666666666666663;
+             const real_t tmp_kernel_op_11 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+             const real_t tmp_kernel_op_13 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+             const real_t tmp_kernel_op_15 = p_affine_0_0 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+             const real_t tmp_kernel_op_17 = (tmp_kernel_op_10*tmp_kernel_op_10);
+             const real_t tmp_kernel_op_18 = tmp_kernel_op_16 + tmp_kernel_op_17;
+             const real_t tmp_kernel_op_26 = pow(tmp_kernel_op_18, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_10*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_28 = pow(tmp_kernel_op_18, -1.5000000000000000);
+             const real_t tmp_kernel_op_31 = radRayVertex + tmp_kernel_op_24*(tmp_kernel_op_21*(tmp_kernel_op_15 + tmp_kernel_op_30) - tmp_kernel_op_5*(tmp_kernel_op_10 + tmp_kernel_op_29));
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_28*tmp_kernel_op_31*1.0 - tmp_kernel_op_27*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_28*tmp_kernel_op_31*1.0;
+             const real_t tmp_kernel_op_35 = -tmp_kernel_op_10*tmp_kernel_op_15*tmp_kernel_op_34 + tmp_kernel_op_21*tmp_kernel_op_27;
+             const real_t tmp_kernel_op_36 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_17*tmp_kernel_op_34 + tmp_kernel_op_21*tmp_kernel_op_33) - tmp_kernel_op_35*(-tmp_kernel_op_10*tmp_kernel_op_15*tmp_kernel_op_28*tmp_kernel_op_31 - tmp_kernel_op_33*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_32*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_38 = -tmp_kernel_op_35*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_40 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_44 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = -tmp_kernel_op_20*(rayVertex_0 + tmp_kernel_op_42) + tmp_kernel_op_4*(rayVertex_1 + tmp_kernel_op_45);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54)*(tmp_kernel_op_4*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56) - (tmp_kernel_op_20*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57)*(tmp_kernel_op_4*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*(-tmp_kernel_op_3*tmp_kernel_op_39 - tmp_kernel_op_3*tmp_kernel_op_40);
+             const real_t tmp_kernel_op_64 = p_affine_0_1 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = p_affine_0_0 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_66 = (tmp_kernel_op_65*tmp_kernel_op_65);
+             const real_t tmp_kernel_op_67 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_66 + tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_25*pow(tmp_kernel_op_68, -0.50000000000000000);
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_64*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_71 = pow(tmp_kernel_op_68, -1.5000000000000000);
+             const real_t tmp_kernel_op_72 = radRayVertex + tmp_kernel_op_24*(tmp_kernel_op_21*(tmp_kernel_op_30 + tmp_kernel_op_65) - tmp_kernel_op_5*(tmp_kernel_op_29 + tmp_kernel_op_64));
+             const real_t tmp_kernel_op_73 = -tmp_kernel_op_5*tmp_kernel_op_70 + tmp_kernel_op_66*tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_65*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_21*tmp_kernel_op_70 - tmp_kernel_op_64*tmp_kernel_op_65*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = 1.0 / (tmp_kernel_op_73*(tmp_kernel_op_21*tmp_kernel_op_74 + tmp_kernel_op_67*tmp_kernel_op_75) - tmp_kernel_op_76*(-tmp_kernel_op_5*tmp_kernel_op_74 - tmp_kernel_op_64*tmp_kernel_op_65*tmp_kernel_op_71*tmp_kernel_op_72));
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_73*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = -tmp_kernel_op_76*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_78 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_81 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_78 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_44 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = -tmp_kernel_op_20*(rayVertex_0 + tmp_kernel_op_82) + tmp_kernel_op_4*(rayVertex_1 + tmp_kernel_op_84);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91)*(tmp_kernel_op_4*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93) - (tmp_kernel_op_20*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94)*(tmp_kernel_op_4*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*(-tmp_kernel_op_63*tmp_kernel_op_80 - tmp_kernel_op_63*tmp_kernel_op_81);
+             const real_t tmp_kernel_op_101 = p_affine_0_1 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_102 = p_affine_0_0 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_103 = (tmp_kernel_op_102*tmp_kernel_op_102);
+             const real_t tmp_kernel_op_104 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_103 + tmp_kernel_op_104;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_105, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_101*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_108 = pow(tmp_kernel_op_105, -1.5000000000000000);
+             const real_t tmp_kernel_op_109 = radRayVertex + tmp_kernel_op_24*(tmp_kernel_op_21*(tmp_kernel_op_102 + tmp_kernel_op_30) - tmp_kernel_op_5*(tmp_kernel_op_101 + tmp_kernel_op_29));
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_103*tmp_kernel_op_108*tmp_kernel_op_109*1.0 - tmp_kernel_op_107*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_102*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_108*tmp_kernel_op_109*1.0;
+             const real_t tmp_kernel_op_113 = -tmp_kernel_op_101*tmp_kernel_op_102*tmp_kernel_op_112 + tmp_kernel_op_107*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_110*(tmp_kernel_op_104*tmp_kernel_op_112 + tmp_kernel_op_111*tmp_kernel_op_21) - tmp_kernel_op_113*(-tmp_kernel_op_101*tmp_kernel_op_102*tmp_kernel_op_108*tmp_kernel_op_109 - tmp_kernel_op_111*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_110*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_116 = -tmp_kernel_op_113*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_115 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_116;
+             const real_t tmp_kernel_op_118 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_115 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_116;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_41;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_44 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = -tmp_kernel_op_20*(rayVertex_0 + tmp_kernel_op_119) + tmp_kernel_op_4*(rayVertex_1 + tmp_kernel_op_121);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_4)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_20) + (tmp_kernel_op_125*tmp_kernel_op_4 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_20));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*(-tmp_kernel_op_100*tmp_kernel_op_117 - tmp_kernel_op_100*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_0*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_60*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_132*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_135*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_137*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_132*tmp_kernel_op_139;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_40*(tmp_kernel_op_2 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_81*(tmp_kernel_op_62 - 1.0);
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_118*(tmp_kernel_op_99 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_143*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_144*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_132*tmp_kernel_op_145;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_1*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_58*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_61*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_95*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_118*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*(tmp_kernel_op_150 - tmp_kernel_op_40*(-tmp_kernel_op_1 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*(tmp_kernel_op_153 - tmp_kernel_op_81*(-tmp_kernel_op_61 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*(-tmp_kernel_op_118*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_58*(tmp_kernel_op_149 - tmp_kernel_op_39*(-tmp_kernel_op_2 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_95*(tmp_kernel_op_152 - tmp_kernel_op_80*(-tmp_kernel_op_62 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_132*(-tmp_kernel_op_117*(-tmp_kernel_op_99 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMat_0_0 = tmp_kernel_op_0*tmp_kernel_op_59 + tmp_kernel_op_133*tmp_kernel_op_97 + tmp_kernel_op_60*tmp_kernel_op_96;
+             const real_t elMat_0_1 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.16666666666666666 + tmp_kernel_op_96*0.66666666666666663;
+             const real_t elMat_0_2 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.66666666666666663 + tmp_kernel_op_96*0.16666666666666666;
+             const real_t elMat_1_0 = -tmp_kernel_op_134*tmp_kernel_op_135 - tmp_kernel_op_136*tmp_kernel_op_137 - tmp_kernel_op_138*tmp_kernel_op_139;
+             const real_t elMat_1_1 = tmp_kernel_op_140*-0.16666666666666666 + tmp_kernel_op_141*-0.66666666666666663 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_1_2 = tmp_kernel_op_140*-0.66666666666666663 + tmp_kernel_op_141*-0.16666666666666666 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_2_0 = -tmp_kernel_op_134*tmp_kernel_op_143 - tmp_kernel_op_136*tmp_kernel_op_144 - tmp_kernel_op_138*tmp_kernel_op_145;
+             const real_t elMat_2_1 = tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_147*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_2_2 = tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_147*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_3_0 = tmp_kernel_op_0*tmp_kernel_op_151 + tmp_kernel_op_154*tmp_kernel_op_60 + tmp_kernel_op_157*tmp_kernel_op_97;
+             const real_t elMat_3_1 = tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_3_2 = tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_4_0 = tmp_kernel_op_0*tmp_kernel_op_158 + tmp_kernel_op_159*tmp_kernel_op_60 + tmp_kernel_op_160*tmp_kernel_op_97;
+             const real_t elMat_4_1 = tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_4_2 = tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_5_0 = tmp_kernel_op_0*tmp_kernel_op_161 + tmp_kernel_op_162*tmp_kernel_op_60 + tmp_kernel_op_163*tmp_kernel_op_97;
+             const real_t elMat_5_1 = tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666;
+             const real_t elMat_5_2 = tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 3 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_1_0));
+             _data_mat[4] = ((real_t)(elMat_1_1));
+             _data_mat[5] = ((real_t)(elMat_1_2));
+             _data_mat[6] = ((real_t)(elMat_2_0));
+             _data_mat[7] = ((real_t)(elMat_2_1));
+             _data_mat[8] = ((real_t)(elMat_2_2));
+             _data_mat[9] = ((real_t)(elMat_3_0));
+             _data_mat[10] = ((real_t)(elMat_3_1));
+             _data_mat[11] = ((real_t)(elMat_3_2));
+             _data_mat[12] = ((real_t)(elMat_4_0));
+             _data_mat[13] = ((real_t)(elMat_4_1));
+             _data_mat[14] = ((real_t)(elMat_4_2));
+             _data_mat[15] = ((real_t)(elMat_5_0));
+             _data_mat[16] = ((real_t)(elMat_5_1));
+             _data_mat[17] = ((real_t)(elMat_5_2));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bcefeff96db904e95eff98249e3fb3a9fd53bc18
--- /dev/null
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp
@@ -0,0 +1,493 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT  _data_dstEdge, real_t * RESTRICT  _data_dstVertex, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.66666666666666663;
+       const real_t tmp_kernel_op_1 = 2.6666666666666665;
+       const real_t tmp_kernel_op_2 = tmp_kernel_op_0 + tmp_kernel_op_1 - 3.0;
+       const real_t tmp_kernel_op_3 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_4 = -tmp_kernel_op_3;
+       const real_t tmp_kernel_op_18 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_19 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_20 = -tmp_kernel_op_19;
+       const real_t tmp_kernel_op_21 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_22 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_23 = -tmp_kernel_op_22*1.0 / (-tmp_kernel_op_18*tmp_kernel_op_4 + tmp_kernel_op_20*tmp_kernel_op_21);
+       const real_t tmp_kernel_op_24 = tmp_kernel_op_23*1.0;
+       const real_t tmp_kernel_op_28 = -rayVertex_1;
+       const real_t tmp_kernel_op_29 = -rayVertex_0;
+       const real_t tmp_kernel_op_47 = tmp_kernel_op_22*1.0 / (tmp_kernel_op_18*tmp_kernel_op_3 - tmp_kernel_op_19*tmp_kernel_op_21);
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_47*1.0;
+       const real_t tmp_kernel_op_59 = 2.6666666666666665;
+       const real_t tmp_kernel_op_60 = 0.66666666666666663;
+       const real_t tmp_kernel_op_61 = tmp_kernel_op_59 + tmp_kernel_op_60 - 3.0;
+       const real_t tmp_kernel_op_95 = 0.66666666666666663;
+       const real_t tmp_kernel_op_96 = 0.66666666666666663;
+       const real_t tmp_kernel_op_97 = tmp_kernel_op_95 + tmp_kernel_op_96 - 3.0;
+       const real_t tmp_kernel_op_131 = 0.16666666666666674;
+       const real_t tmp_kernel_op_132 = 0.16666666666666671;
+       const real_t tmp_kernel_op_133 = 0.66666666666666674;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t tmp_kernel_op_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+             const real_t tmp_kernel_op_13 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+             const real_t tmp_kernel_op_15 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_10 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_15 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_31 + tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9*1.0 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_10*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_20*tmp_kernel_op_33) + tmp_kernel_op_34*(-tmp_kernel_op_15*tmp_kernel_op_31*tmp_kernel_op_9 + tmp_kernel_op_33*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_32*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_37 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_40 = -p_affine_0_0;
+             const real_t tmp_kernel_op_41 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+             const real_t tmp_kernel_op_43 = -p_affine_0_1;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+             const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_44) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_41);
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+             const real_t tmp_kernel_op_57 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53));
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+             const real_t tmp_kernel_op_62 = p_affine_0_0 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_63 = (tmp_kernel_op_62*tmp_kernel_op_62);
+             const real_t tmp_kernel_op_64 = p_affine_0_1 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_63 + tmp_kernel_op_65;
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+             const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_28 + tmp_kernel_op_64) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_62));
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_65*tmp_kernel_op_71;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_64*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 + tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_72*(-tmp_kernel_op_20*tmp_kernel_op_73 + tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70*1.0) + tmp_kernel_op_74*(tmp_kernel_op_4*tmp_kernel_op_73 - tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_71));
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_72*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_74*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_77 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_40 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_82) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_80);
+             const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+             const real_t tmp_kernel_op_93 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89));
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_98 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_99 = (tmp_kernel_op_98*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_100 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_101 = (tmp_kernel_op_100*tmp_kernel_op_100);
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_101 + tmp_kernel_op_99;
+             const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+             const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_100 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_98));
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_107 + tmp_kernel_op_104*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_100*tmp_kernel_op_103;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*1.0 + tmp_kernel_op_104*tmp_kernel_op_20;
+             const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_108*(tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_99*1.0 - tmp_kernel_op_109*tmp_kernel_op_20) + tmp_kernel_op_110*(-tmp_kernel_op_100*tmp_kernel_op_107*tmp_kernel_op_98 + tmp_kernel_op_109*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_108*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_110*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_113 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_112;
+             const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_113 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_112;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_118) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_116);
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+             const real_t tmp_kernel_op_129 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_19)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_3) + (tmp_kernel_op_122*tmp_kernel_op_19 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_3));
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+             const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+             const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+             const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+             const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+             const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t tmp_kernel_op_5 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_6 = -tmp_kernel_op_5;
+             const real_t tmp_kernel_op_7 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_8 = -tmp_kernel_op_7;
+             const real_t tmp_kernel_op_9 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_10 = (tmp_kernel_op_9*tmp_kernel_op_9);
+             const real_t tmp_kernel_op_11 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_12 = -tmp_kernel_op_11;
+             const real_t tmp_kernel_op_13 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_14 = -tmp_kernel_op_13;
+             const real_t tmp_kernel_op_15 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663;
+             const real_t tmp_kernel_op_16 = (tmp_kernel_op_15*tmp_kernel_op_15);
+             const real_t tmp_kernel_op_17 = tmp_kernel_op_10 + tmp_kernel_op_16;
+             const real_t tmp_kernel_op_25 = pow(tmp_kernel_op_17, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_26 = tmp_kernel_op_25*tmp_kernel_op_9;
+             const real_t tmp_kernel_op_27 = pow(tmp_kernel_op_17, -1.5000000000000000);
+             const real_t tmp_kernel_op_30 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_15 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_9));
+             const real_t tmp_kernel_op_31 = tmp_kernel_op_27*tmp_kernel_op_30*1.0;
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_16*tmp_kernel_op_31 + tmp_kernel_op_26*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_15*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_15*tmp_kernel_op_27*tmp_kernel_op_30*tmp_kernel_op_9*1.0 + tmp_kernel_op_20*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = 1.0 / (tmp_kernel_op_32*(tmp_kernel_op_10*tmp_kernel_op_27*tmp_kernel_op_30*1.0 - tmp_kernel_op_20*tmp_kernel_op_33) + tmp_kernel_op_34*(-tmp_kernel_op_15*tmp_kernel_op_31*tmp_kernel_op_9 + tmp_kernel_op_33*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_36 = tmp_kernel_op_32*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_34*tmp_kernel_op_35;
+             const real_t tmp_kernel_op_38 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_37 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_40 = -p_affine_0_0;
+             const real_t tmp_kernel_op_41 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.66666666666666663;
+             const real_t tmp_kernel_op_42 = (tmp_kernel_op_41*tmp_kernel_op_41);
+             const real_t tmp_kernel_op_43 = -p_affine_0_1;
+             const real_t tmp_kernel_op_44 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_45 = (tmp_kernel_op_44*tmp_kernel_op_44);
+             const real_t tmp_kernel_op_46 = tmp_kernel_op_42 + tmp_kernel_op_45;
+             const real_t tmp_kernel_op_49 = pow(tmp_kernel_op_46, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_50 = tmp_kernel_op_41*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_44) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_41);
+             const real_t tmp_kernel_op_52 = pow(tmp_kernel_op_46, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_53 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_44*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_52*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_51);
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_41*tmp_kernel_op_44;
+             const real_t tmp_kernel_op_57 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_50 - tmp_kernel_op_55*tmp_kernel_op_56)*(tmp_kernel_op_3*tmp_kernel_op_54 + tmp_kernel_op_53*tmp_kernel_op_56) - (tmp_kernel_op_19*tmp_kernel_op_54 + tmp_kernel_op_42*tmp_kernel_op_55)*(tmp_kernel_op_3*tmp_kernel_op_50 - tmp_kernel_op_45*tmp_kernel_op_53));
+             const real_t tmp_kernel_op_58 = tmp_kernel_op_57*(-tmp_kernel_op_2*tmp_kernel_op_38 - tmp_kernel_op_2*tmp_kernel_op_39);
+             const real_t tmp_kernel_op_62 = p_affine_0_0 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_63 = (tmp_kernel_op_62*tmp_kernel_op_62);
+             const real_t tmp_kernel_op_64 = p_affine_0_1 + tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_66 = tmp_kernel_op_63 + tmp_kernel_op_65;
+             const real_t tmp_kernel_op_67 = tmp_kernel_op_24*pow(tmp_kernel_op_66, -0.50000000000000000);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_62*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = pow(tmp_kernel_op_66, -1.5000000000000000);
+             const real_t tmp_kernel_op_70 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_28 + tmp_kernel_op_64) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_62));
+             const real_t tmp_kernel_op_71 = tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_72 = tmp_kernel_op_4*tmp_kernel_op_68 + tmp_kernel_op_65*tmp_kernel_op_71;
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_64*tmp_kernel_op_67;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_20*tmp_kernel_op_68 + tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_69*tmp_kernel_op_70*1.0;
+             const real_t tmp_kernel_op_75 = 1.0 / (tmp_kernel_op_72*(-tmp_kernel_op_20*tmp_kernel_op_73 + tmp_kernel_op_63*tmp_kernel_op_69*tmp_kernel_op_70*1.0) + tmp_kernel_op_74*(tmp_kernel_op_4*tmp_kernel_op_73 - tmp_kernel_op_62*tmp_kernel_op_64*tmp_kernel_op_71));
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_72*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_77 = tmp_kernel_op_74*tmp_kernel_op_75;
+             const real_t tmp_kernel_op_78 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_79 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_77 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_76;
+             const real_t tmp_kernel_op_80 = tmp_kernel_op_40 + tmp_kernel_op_5*0.66666666666666663 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_81 = (tmp_kernel_op_80*tmp_kernel_op_80);
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_11*0.66666666666666663 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_81 + tmp_kernel_op_83;
+             const real_t tmp_kernel_op_85 = tmp_kernel_op_48*pow(tmp_kernel_op_84, -0.50000000000000000);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_80*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_82) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_80);
+             const real_t tmp_kernel_op_88 = pow(tmp_kernel_op_84, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_90 = tmp_kernel_op_82*tmp_kernel_op_85;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_88*(radRayVertex + tmp_kernel_op_47*tmp_kernel_op_87);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_80*tmp_kernel_op_82;
+             const real_t tmp_kernel_op_93 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_19*tmp_kernel_op_86 - tmp_kernel_op_91*tmp_kernel_op_92)*(tmp_kernel_op_3*tmp_kernel_op_90 + tmp_kernel_op_89*tmp_kernel_op_92) - (tmp_kernel_op_19*tmp_kernel_op_90 + tmp_kernel_op_81*tmp_kernel_op_91)*(tmp_kernel_op_3*tmp_kernel_op_86 - tmp_kernel_op_83*tmp_kernel_op_89));
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_93*(-tmp_kernel_op_61*tmp_kernel_op_78 - tmp_kernel_op_61*tmp_kernel_op_79);
+             const real_t tmp_kernel_op_98 = p_affine_0_0 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_99 = (tmp_kernel_op_98*tmp_kernel_op_98);
+             const real_t tmp_kernel_op_100 = p_affine_0_1 + tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666;
+             const real_t tmp_kernel_op_101 = (tmp_kernel_op_100*tmp_kernel_op_100);
+             const real_t tmp_kernel_op_102 = tmp_kernel_op_101 + tmp_kernel_op_99;
+             const real_t tmp_kernel_op_103 = pow(tmp_kernel_op_102, -0.50000000000000000)*tmp_kernel_op_24;
+             const real_t tmp_kernel_op_104 = tmp_kernel_op_103*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_105 = pow(tmp_kernel_op_102, -1.5000000000000000);
+             const real_t tmp_kernel_op_106 = radRayVertex + tmp_kernel_op_23*(-tmp_kernel_op_20*(tmp_kernel_op_100 + tmp_kernel_op_28) + tmp_kernel_op_4*(tmp_kernel_op_29 + tmp_kernel_op_98));
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_105*tmp_kernel_op_106*1.0;
+             const real_t tmp_kernel_op_108 = tmp_kernel_op_101*tmp_kernel_op_107 + tmp_kernel_op_104*tmp_kernel_op_4;
+             const real_t tmp_kernel_op_109 = tmp_kernel_op_100*tmp_kernel_op_103;
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_100*tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_98*1.0 + tmp_kernel_op_104*tmp_kernel_op_20;
+             const real_t tmp_kernel_op_111 = 1.0 / (tmp_kernel_op_108*(tmp_kernel_op_105*tmp_kernel_op_106*tmp_kernel_op_99*1.0 - tmp_kernel_op_109*tmp_kernel_op_20) + tmp_kernel_op_110*(-tmp_kernel_op_100*tmp_kernel_op_107*tmp_kernel_op_98 + tmp_kernel_op_109*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_108*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_110*tmp_kernel_op_111;
+             const real_t tmp_kernel_op_114 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_113 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_112;
+             const real_t tmp_kernel_op_115 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_113 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_112;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_40 + tmp_kernel_op_5*0.16666666666666666 + tmp_kernel_op_7*0.16666666666666666;
+             const real_t tmp_kernel_op_117 = (tmp_kernel_op_116*tmp_kernel_op_116);
+             const real_t tmp_kernel_op_118 = tmp_kernel_op_11*0.16666666666666666 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_43;
+             const real_t tmp_kernel_op_119 = (tmp_kernel_op_118*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_120 = tmp_kernel_op_117 + tmp_kernel_op_119;
+             const real_t tmp_kernel_op_121 = pow(tmp_kernel_op_120, -0.50000000000000000)*tmp_kernel_op_48;
+             const real_t tmp_kernel_op_122 = tmp_kernel_op_116*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_19*(rayVertex_1 + tmp_kernel_op_118) - tmp_kernel_op_3*(rayVertex_0 + tmp_kernel_op_116);
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_120, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_118*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_127 = tmp_kernel_op_124*(radRayVertex + tmp_kernel_op_123*tmp_kernel_op_47);
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_116*tmp_kernel_op_118;
+             const real_t tmp_kernel_op_129 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_117*tmp_kernel_op_127 + tmp_kernel_op_126*tmp_kernel_op_19)*(-tmp_kernel_op_119*tmp_kernel_op_125 + tmp_kernel_op_122*tmp_kernel_op_3) + (tmp_kernel_op_122*tmp_kernel_op_19 - tmp_kernel_op_127*tmp_kernel_op_128)*(tmp_kernel_op_125*tmp_kernel_op_128 + tmp_kernel_op_126*tmp_kernel_op_3));
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_129*(-tmp_kernel_op_114*tmp_kernel_op_97 - tmp_kernel_op_115*tmp_kernel_op_97);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_38*(tmp_kernel_op_0 - 1.0);
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_134*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_78*(tmp_kernel_op_59 - 1.0);
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_136*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_114*(tmp_kernel_op_95 - 1.0);
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_129*tmp_kernel_op_138;
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_131*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_132*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_129*tmp_kernel_op_133;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_143*tmp_kernel_op_57;
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_79*(tmp_kernel_op_60 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_145*tmp_kernel_op_93;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_115*(tmp_kernel_op_96 - 1.0);
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_129*tmp_kernel_op_147;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_0*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_1*tmp_kernel_op_38;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_57*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_59*tmp_kernel_op_79;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_60*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_93*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_115*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_114*tmp_kernel_op_96;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_129*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_57*(tmp_kernel_op_150 - tmp_kernel_op_39*(-tmp_kernel_op_0 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_93*(tmp_kernel_op_153 - tmp_kernel_op_79*(-tmp_kernel_op_59 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_129*(-tmp_kernel_op_115*(-tmp_kernel_op_95 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_57*(tmp_kernel_op_149 - tmp_kernel_op_38*(-tmp_kernel_op_1 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_93*(tmp_kernel_op_152 - tmp_kernel_op_78*(-tmp_kernel_op_60 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_129*(-tmp_kernel_op_114*(-tmp_kernel_op_96 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMatVec_0 = src_dof_0*(tmp_kernel_op_130*tmp_kernel_op_133 + tmp_kernel_op_131*tmp_kernel_op_58 + tmp_kernel_op_132*tmp_kernel_op_94) + src_dof_1*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.16666666666666666 + tmp_kernel_op_94*0.66666666666666663) + src_dof_2*(tmp_kernel_op_130*0.16666666666666666 + tmp_kernel_op_58*0.66666666666666663 + tmp_kernel_op_94*0.16666666666666666);
+             const real_t elMatVec_1 = src_dof_0*(-tmp_kernel_op_134*tmp_kernel_op_140 - tmp_kernel_op_136*tmp_kernel_op_141 - tmp_kernel_op_138*tmp_kernel_op_142) + src_dof_1*(tmp_kernel_op_135*-0.16666666666666666 + tmp_kernel_op_137*-0.66666666666666663 + tmp_kernel_op_139*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_135*-0.66666666666666663 + tmp_kernel_op_137*-0.16666666666666666 + tmp_kernel_op_139*-0.16666666666666666);
+             const real_t elMatVec_2 = src_dof_0*(-tmp_kernel_op_140*tmp_kernel_op_143 - tmp_kernel_op_141*tmp_kernel_op_145 - tmp_kernel_op_142*tmp_kernel_op_147) + src_dof_1*(tmp_kernel_op_144*-0.16666666666666666 + tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666) + src_dof_2*(tmp_kernel_op_144*-0.66666666666666663 + tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666);
+             const real_t elMatVec_3 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_151 + tmp_kernel_op_132*tmp_kernel_op_154 + tmp_kernel_op_133*tmp_kernel_op_157) + src_dof_1*(tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666) + src_dof_2*(tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666);
+             const real_t elMatVec_4 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_158 + tmp_kernel_op_132*tmp_kernel_op_159 + tmp_kernel_op_133*tmp_kernel_op_160) + src_dof_1*(tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666) + src_dof_2*(tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666);
+             const real_t elMatVec_5 = src_dof_0*(tmp_kernel_op_131*tmp_kernel_op_161 + tmp_kernel_op_132*tmp_kernel_op_162 + tmp_kernel_op_133*tmp_kernel_op_163) + src_dof_1*(tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666) + src_dof_2*(tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666);
+             _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1];
+             _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..31b411949845839fc1033f532da2966ed121877c
--- /dev/null
+++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp
@@ -0,0 +1,579 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT  _data_dstEdge, idx_t * RESTRICT  _data_dstVertex, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY;
+       const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY);
+       const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY);
+       const real_t tmp_kernel_op_0 = 0.16666666666666674;
+       const real_t tmp_kernel_op_1 = 0.66666666666666663;
+       const real_t tmp_kernel_op_2 = 2.6666666666666665;
+       const real_t tmp_kernel_op_3 = tmp_kernel_op_1 + tmp_kernel_op_2 - 3.0;
+       const real_t tmp_kernel_op_4 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_kernel_op_5 = -tmp_kernel_op_4;
+       const real_t tmp_kernel_op_19 = rayVertex_0 - refVertex_0;
+       const real_t tmp_kernel_op_20 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_kernel_op_21 = -tmp_kernel_op_20;
+       const real_t tmp_kernel_op_22 = rayVertex_1 - refVertex_1;
+       const real_t tmp_kernel_op_23 = radRayVertex - radRefVertex;
+       const real_t tmp_kernel_op_24 = -tmp_kernel_op_23*1.0 / (-tmp_kernel_op_19*tmp_kernel_op_5 + tmp_kernel_op_21*tmp_kernel_op_22);
+       const real_t tmp_kernel_op_25 = tmp_kernel_op_24*1.0;
+       const real_t tmp_kernel_op_29 = -rayVertex_1;
+       const real_t tmp_kernel_op_30 = -rayVertex_0;
+       const real_t tmp_kernel_op_48 = tmp_kernel_op_23*1.0 / (tmp_kernel_op_19*tmp_kernel_op_4 - tmp_kernel_op_20*tmp_kernel_op_22);
+       const real_t tmp_kernel_op_49 = tmp_kernel_op_48*1.0;
+       const real_t tmp_kernel_op_60 = 0.16666666666666671;
+       const real_t tmp_kernel_op_61 = 2.6666666666666665;
+       const real_t tmp_kernel_op_62 = 0.66666666666666663;
+       const real_t tmp_kernel_op_63 = tmp_kernel_op_61 + tmp_kernel_op_62 - 3.0;
+       const real_t tmp_kernel_op_97 = 0.66666666666666674;
+       const real_t tmp_kernel_op_98 = 0.66666666666666663;
+       const real_t tmp_kernel_op_99 = 0.66666666666666663;
+       const real_t tmp_kernel_op_100 = tmp_kernel_op_98 + tmp_kernel_op_99 - 3.0;
+       {
+          /* FaceType.GRAY */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_7 = -tmp_kernel_op_6;
+             const real_t tmp_kernel_op_8 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_0 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.66666666666666663;
+             const real_t tmp_kernel_op_11 = (tmp_kernel_op_10*tmp_kernel_op_10);
+             const real_t tmp_kernel_op_12 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+             const real_t tmp_kernel_op_14 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_15 = -tmp_kernel_op_14;
+             const real_t tmp_kernel_op_16 = p_affine_0_1 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_15*0.66666666666666663;
+             const real_t tmp_kernel_op_17 = (tmp_kernel_op_16*tmp_kernel_op_16);
+             const real_t tmp_kernel_op_18 = tmp_kernel_op_11 + tmp_kernel_op_17;
+             const real_t tmp_kernel_op_26 = pow(tmp_kernel_op_18, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_10*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_28 = pow(tmp_kernel_op_18, -1.5000000000000000);
+             const real_t tmp_kernel_op_31 = radRayVertex + tmp_kernel_op_24*(-tmp_kernel_op_21*(tmp_kernel_op_16 + tmp_kernel_op_29) + tmp_kernel_op_5*(tmp_kernel_op_10 + tmp_kernel_op_30));
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_28*tmp_kernel_op_31*1.0;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_17*tmp_kernel_op_32 + tmp_kernel_op_27*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_16*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = tmp_kernel_op_10*tmp_kernel_op_16*tmp_kernel_op_28*tmp_kernel_op_31*1.0 + tmp_kernel_op_21*tmp_kernel_op_27;
+             const real_t tmp_kernel_op_36 = 1.0 / (tmp_kernel_op_33*(tmp_kernel_op_11*tmp_kernel_op_28*tmp_kernel_op_31*1.0 - tmp_kernel_op_21*tmp_kernel_op_34) + tmp_kernel_op_35*(-tmp_kernel_op_10*tmp_kernel_op_16*tmp_kernel_op_32 + tmp_kernel_op_34*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_33*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_35*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_38 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_40 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_38 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_41 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_20*(rayVertex_1 + tmp_kernel_op_45) - tmp_kernel_op_4*(rayVertex_0 + tmp_kernel_op_42);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57)*(tmp_kernel_op_4*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57) - (tmp_kernel_op_20*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56)*(tmp_kernel_op_4*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*(-tmp_kernel_op_3*tmp_kernel_op_39 - tmp_kernel_op_3*tmp_kernel_op_40);
+             const real_t tmp_kernel_op_64 = p_affine_0_0 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_66 = p_affine_0_1 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_15*0.16666666666666666;
+             const real_t tmp_kernel_op_67 = (tmp_kernel_op_66*tmp_kernel_op_66);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_65 + tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_25*pow(tmp_kernel_op_68, -0.50000000000000000);
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_64*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_71 = pow(tmp_kernel_op_68, -1.5000000000000000);
+             const real_t tmp_kernel_op_72 = radRayVertex + tmp_kernel_op_24*(-tmp_kernel_op_21*(tmp_kernel_op_29 + tmp_kernel_op_66) + tmp_kernel_op_5*(tmp_kernel_op_30 + tmp_kernel_op_64));
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_5*tmp_kernel_op_70 + tmp_kernel_op_67*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_66*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_21*tmp_kernel_op_70 + tmp_kernel_op_64*tmp_kernel_op_66*tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_77 = 1.0 / (tmp_kernel_op_74*(-tmp_kernel_op_21*tmp_kernel_op_75 + tmp_kernel_op_65*tmp_kernel_op_71*tmp_kernel_op_72*1.0) + tmp_kernel_op_76*(tmp_kernel_op_5*tmp_kernel_op_75 - tmp_kernel_op_64*tmp_kernel_op_66*tmp_kernel_op_73));
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_74*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = tmp_kernel_op_76*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_79 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_81 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_79 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_41 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_20*(rayVertex_1 + tmp_kernel_op_84) - tmp_kernel_op_4*(rayVertex_0 + tmp_kernel_op_82);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_GRAY*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94)*(tmp_kernel_op_4*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94) - (tmp_kernel_op_20*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93)*(tmp_kernel_op_4*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*(-tmp_kernel_op_63*tmp_kernel_op_80 - tmp_kernel_op_63*tmp_kernel_op_81);
+             const real_t tmp_kernel_op_101 = p_affine_0_0 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = p_affine_0_1 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_15*0.16666666666666666;
+             const real_t tmp_kernel_op_104 = (tmp_kernel_op_103*tmp_kernel_op_103);
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_102 + tmp_kernel_op_104;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_105, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_101*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_108 = pow(tmp_kernel_op_105, -1.5000000000000000);
+             const real_t tmp_kernel_op_109 = radRayVertex + tmp_kernel_op_24*(-tmp_kernel_op_21*(tmp_kernel_op_103 + tmp_kernel_op_29) + tmp_kernel_op_5*(tmp_kernel_op_101 + tmp_kernel_op_30));
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_108*tmp_kernel_op_109*1.0;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_104*tmp_kernel_op_110 + tmp_kernel_op_107*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_103*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_101*tmp_kernel_op_103*tmp_kernel_op_108*tmp_kernel_op_109*1.0 + tmp_kernel_op_107*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_111*(tmp_kernel_op_102*tmp_kernel_op_108*tmp_kernel_op_109*1.0 - tmp_kernel_op_112*tmp_kernel_op_21) + tmp_kernel_op_113*(-tmp_kernel_op_101*tmp_kernel_op_103*tmp_kernel_op_110 + tmp_kernel_op_112*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_111*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_113*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_0_0_GRAY*tmp_kernel_op_116 + jac_affine_inv_0_1_GRAY*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_118 = jac_affine_inv_1_0_GRAY*tmp_kernel_op_116 + jac_affine_inv_1_1_GRAY*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_41 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_20*(rayVertex_1 + tmp_kernel_op_121) - tmp_kernel_op_4*(rayVertex_0 + tmp_kernel_op_119);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_GRAY*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_20)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_4) + (tmp_kernel_op_125*tmp_kernel_op_20 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*(-tmp_kernel_op_100*tmp_kernel_op_117 - tmp_kernel_op_100*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_0*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_60*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_132*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_135*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_137*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_132*tmp_kernel_op_139;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_40*(tmp_kernel_op_2 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_81*(tmp_kernel_op_62 - 1.0);
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_118*(tmp_kernel_op_99 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_143*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_144*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_132*tmp_kernel_op_145;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_1*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_58*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_61*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_95*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_118*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*(tmp_kernel_op_150 - tmp_kernel_op_40*(-tmp_kernel_op_1 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*(tmp_kernel_op_153 - tmp_kernel_op_81*(-tmp_kernel_op_61 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*(-tmp_kernel_op_118*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_58*(tmp_kernel_op_149 - tmp_kernel_op_39*(-tmp_kernel_op_2 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_95*(tmp_kernel_op_152 - tmp_kernel_op_80*(-tmp_kernel_op_62 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_132*(-tmp_kernel_op_117*(-tmp_kernel_op_99 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMat_0_0 = tmp_kernel_op_0*tmp_kernel_op_59 + tmp_kernel_op_133*tmp_kernel_op_97 + tmp_kernel_op_60*tmp_kernel_op_96;
+             const real_t elMat_0_1 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.16666666666666666 + tmp_kernel_op_96*0.66666666666666663;
+             const real_t elMat_0_2 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.66666666666666663 + tmp_kernel_op_96*0.16666666666666666;
+             const real_t elMat_1_0 = -tmp_kernel_op_134*tmp_kernel_op_135 - tmp_kernel_op_136*tmp_kernel_op_137 - tmp_kernel_op_138*tmp_kernel_op_139;
+             const real_t elMat_1_1 = tmp_kernel_op_140*-0.16666666666666666 + tmp_kernel_op_141*-0.66666666666666663 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_1_2 = tmp_kernel_op_140*-0.66666666666666663 + tmp_kernel_op_141*-0.16666666666666666 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_2_0 = -tmp_kernel_op_134*tmp_kernel_op_143 - tmp_kernel_op_136*tmp_kernel_op_144 - tmp_kernel_op_138*tmp_kernel_op_145;
+             const real_t elMat_2_1 = tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_147*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_2_2 = tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_147*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_3_0 = tmp_kernel_op_0*tmp_kernel_op_151 + tmp_kernel_op_154*tmp_kernel_op_60 + tmp_kernel_op_157*tmp_kernel_op_97;
+             const real_t elMat_3_1 = tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_3_2 = tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_4_0 = tmp_kernel_op_0*tmp_kernel_op_158 + tmp_kernel_op_159*tmp_kernel_op_60 + tmp_kernel_op_160*tmp_kernel_op_97;
+             const real_t elMat_4_1 = tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_4_2 = tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_5_0 = tmp_kernel_op_0*tmp_kernel_op_161 + tmp_kernel_op_162*tmp_kernel_op_60 + tmp_kernel_op_163*tmp_kernel_op_97;
+             const real_t elMat_5_1 = tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666;
+             const real_t elMat_5_2 = tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 3 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_1_0));
+             _data_mat[4] = ((real_t)(elMat_1_1));
+             _data_mat[5] = ((real_t)(elMat_1_2));
+             _data_mat[6] = ((real_t)(elMat_2_0));
+             _data_mat[7] = ((real_t)(elMat_2_1));
+             _data_mat[8] = ((real_t)(elMat_2_2));
+             _data_mat[9] = ((real_t)(elMat_3_0));
+             _data_mat[10] = ((real_t)(elMat_3_1));
+             _data_mat[11] = ((real_t)(elMat_3_2));
+             _data_mat[12] = ((real_t)(elMat_4_0));
+             _data_mat[13] = ((real_t)(elMat_4_1));
+             _data_mat[14] = ((real_t)(elMat_4_2));
+             _data_mat[15] = ((real_t)(elMat_5_0));
+             _data_mat[16] = ((real_t)(elMat_5_1));
+             _data_mat[17] = ((real_t)(elMat_5_2));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE;
+       const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE);
+       const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE);
+       {
+          /* FaceType.BLUE */
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t tmp_kernel_op_6 = p_affine_0_0 - p_affine_1_0;
+             const real_t tmp_kernel_op_7 = -tmp_kernel_op_6;
+             const real_t tmp_kernel_op_8 = p_affine_0_0 - p_affine_2_0;
+             const real_t tmp_kernel_op_9 = -tmp_kernel_op_8;
+             const real_t tmp_kernel_op_10 = p_affine_0_0 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.66666666666666663;
+             const real_t tmp_kernel_op_11 = (tmp_kernel_op_10*tmp_kernel_op_10);
+             const real_t tmp_kernel_op_12 = p_affine_0_1 - p_affine_1_1;
+             const real_t tmp_kernel_op_13 = -tmp_kernel_op_12;
+             const real_t tmp_kernel_op_14 = p_affine_0_1 - p_affine_2_1;
+             const real_t tmp_kernel_op_15 = -tmp_kernel_op_14;
+             const real_t tmp_kernel_op_16 = p_affine_0_1 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_15*0.66666666666666663;
+             const real_t tmp_kernel_op_17 = (tmp_kernel_op_16*tmp_kernel_op_16);
+             const real_t tmp_kernel_op_18 = tmp_kernel_op_11 + tmp_kernel_op_17;
+             const real_t tmp_kernel_op_26 = pow(tmp_kernel_op_18, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_27 = tmp_kernel_op_10*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_28 = pow(tmp_kernel_op_18, -1.5000000000000000);
+             const real_t tmp_kernel_op_31 = radRayVertex + tmp_kernel_op_24*(-tmp_kernel_op_21*(tmp_kernel_op_16 + tmp_kernel_op_29) + tmp_kernel_op_5*(tmp_kernel_op_10 + tmp_kernel_op_30));
+             const real_t tmp_kernel_op_32 = tmp_kernel_op_28*tmp_kernel_op_31*1.0;
+             const real_t tmp_kernel_op_33 = tmp_kernel_op_17*tmp_kernel_op_32 + tmp_kernel_op_27*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_34 = tmp_kernel_op_16*tmp_kernel_op_26;
+             const real_t tmp_kernel_op_35 = tmp_kernel_op_10*tmp_kernel_op_16*tmp_kernel_op_28*tmp_kernel_op_31*1.0 + tmp_kernel_op_21*tmp_kernel_op_27;
+             const real_t tmp_kernel_op_36 = 1.0 / (tmp_kernel_op_33*(tmp_kernel_op_11*tmp_kernel_op_28*tmp_kernel_op_31*1.0 - tmp_kernel_op_21*tmp_kernel_op_34) + tmp_kernel_op_35*(-tmp_kernel_op_10*tmp_kernel_op_16*tmp_kernel_op_32 + tmp_kernel_op_34*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_37 = tmp_kernel_op_33*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_38 = tmp_kernel_op_35*tmp_kernel_op_36;
+             const real_t tmp_kernel_op_39 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_38 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_40 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_38 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_37;
+             const real_t tmp_kernel_op_41 = -p_affine_0_0;
+             const real_t tmp_kernel_op_42 = tmp_kernel_op_41 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.66666666666666663;
+             const real_t tmp_kernel_op_43 = (tmp_kernel_op_42*tmp_kernel_op_42);
+             const real_t tmp_kernel_op_44 = -p_affine_0_1;
+             const real_t tmp_kernel_op_45 = tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.66666666666666663 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_46 = (tmp_kernel_op_45*tmp_kernel_op_45);
+             const real_t tmp_kernel_op_47 = tmp_kernel_op_43 + tmp_kernel_op_46;
+             const real_t tmp_kernel_op_50 = pow(tmp_kernel_op_47, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_51 = tmp_kernel_op_42*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_52 = tmp_kernel_op_20*(rayVertex_1 + tmp_kernel_op_45) - tmp_kernel_op_4*(rayVertex_0 + tmp_kernel_op_42);
+             const real_t tmp_kernel_op_53 = pow(tmp_kernel_op_47, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_54 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_55 = tmp_kernel_op_45*tmp_kernel_op_50;
+             const real_t tmp_kernel_op_56 = tmp_kernel_op_53*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_52);
+             const real_t tmp_kernel_op_57 = tmp_kernel_op_42*tmp_kernel_op_45;
+             const real_t tmp_kernel_op_58 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_51 - tmp_kernel_op_56*tmp_kernel_op_57)*(tmp_kernel_op_4*tmp_kernel_op_55 + tmp_kernel_op_54*tmp_kernel_op_57) - (tmp_kernel_op_20*tmp_kernel_op_55 + tmp_kernel_op_43*tmp_kernel_op_56)*(tmp_kernel_op_4*tmp_kernel_op_51 - tmp_kernel_op_46*tmp_kernel_op_54));
+             const real_t tmp_kernel_op_59 = tmp_kernel_op_58*(-tmp_kernel_op_3*tmp_kernel_op_39 - tmp_kernel_op_3*tmp_kernel_op_40);
+             const real_t tmp_kernel_op_64 = p_affine_0_0 + tmp_kernel_op_7*0.66666666666666663 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_65 = (tmp_kernel_op_64*tmp_kernel_op_64);
+             const real_t tmp_kernel_op_66 = p_affine_0_1 + tmp_kernel_op_13*0.66666666666666663 + tmp_kernel_op_15*0.16666666666666666;
+             const real_t tmp_kernel_op_67 = (tmp_kernel_op_66*tmp_kernel_op_66);
+             const real_t tmp_kernel_op_68 = tmp_kernel_op_65 + tmp_kernel_op_67;
+             const real_t tmp_kernel_op_69 = tmp_kernel_op_25*pow(tmp_kernel_op_68, -0.50000000000000000);
+             const real_t tmp_kernel_op_70 = tmp_kernel_op_64*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_71 = pow(tmp_kernel_op_68, -1.5000000000000000);
+             const real_t tmp_kernel_op_72 = radRayVertex + tmp_kernel_op_24*(-tmp_kernel_op_21*(tmp_kernel_op_29 + tmp_kernel_op_66) + tmp_kernel_op_5*(tmp_kernel_op_30 + tmp_kernel_op_64));
+             const real_t tmp_kernel_op_73 = tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_74 = tmp_kernel_op_5*tmp_kernel_op_70 + tmp_kernel_op_67*tmp_kernel_op_73;
+             const real_t tmp_kernel_op_75 = tmp_kernel_op_66*tmp_kernel_op_69;
+             const real_t tmp_kernel_op_76 = tmp_kernel_op_21*tmp_kernel_op_70 + tmp_kernel_op_64*tmp_kernel_op_66*tmp_kernel_op_71*tmp_kernel_op_72*1.0;
+             const real_t tmp_kernel_op_77 = 1.0 / (tmp_kernel_op_74*(-tmp_kernel_op_21*tmp_kernel_op_75 + tmp_kernel_op_65*tmp_kernel_op_71*tmp_kernel_op_72*1.0) + tmp_kernel_op_76*(tmp_kernel_op_5*tmp_kernel_op_75 - tmp_kernel_op_64*tmp_kernel_op_66*tmp_kernel_op_73));
+             const real_t tmp_kernel_op_78 = tmp_kernel_op_74*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_79 = tmp_kernel_op_76*tmp_kernel_op_77;
+             const real_t tmp_kernel_op_80 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_79 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_81 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_79 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_78;
+             const real_t tmp_kernel_op_82 = tmp_kernel_op_41 + tmp_kernel_op_6*0.66666666666666663 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_83 = (tmp_kernel_op_82*tmp_kernel_op_82);
+             const real_t tmp_kernel_op_84 = tmp_kernel_op_12*0.66666666666666663 + tmp_kernel_op_14*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_85 = (tmp_kernel_op_84*tmp_kernel_op_84);
+             const real_t tmp_kernel_op_86 = tmp_kernel_op_83 + tmp_kernel_op_85;
+             const real_t tmp_kernel_op_87 = tmp_kernel_op_49*pow(tmp_kernel_op_86, -0.50000000000000000);
+             const real_t tmp_kernel_op_88 = tmp_kernel_op_82*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_89 = tmp_kernel_op_20*(rayVertex_1 + tmp_kernel_op_84) - tmp_kernel_op_4*(rayVertex_0 + tmp_kernel_op_82);
+             const real_t tmp_kernel_op_90 = pow(tmp_kernel_op_86, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_91 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_92 = tmp_kernel_op_84*tmp_kernel_op_87;
+             const real_t tmp_kernel_op_93 = tmp_kernel_op_90*(radRayVertex + tmp_kernel_op_48*tmp_kernel_op_89);
+             const real_t tmp_kernel_op_94 = tmp_kernel_op_82*tmp_kernel_op_84;
+             const real_t tmp_kernel_op_95 = abs_det_jac_affine_BLUE*0.16666666666666666*abs((tmp_kernel_op_20*tmp_kernel_op_88 - tmp_kernel_op_93*tmp_kernel_op_94)*(tmp_kernel_op_4*tmp_kernel_op_92 + tmp_kernel_op_91*tmp_kernel_op_94) - (tmp_kernel_op_20*tmp_kernel_op_92 + tmp_kernel_op_83*tmp_kernel_op_93)*(tmp_kernel_op_4*tmp_kernel_op_88 - tmp_kernel_op_85*tmp_kernel_op_91));
+             const real_t tmp_kernel_op_96 = tmp_kernel_op_95*(-tmp_kernel_op_63*tmp_kernel_op_80 - tmp_kernel_op_63*tmp_kernel_op_81);
+             const real_t tmp_kernel_op_101 = p_affine_0_0 + tmp_kernel_op_7*0.16666666666666666 + tmp_kernel_op_9*0.16666666666666666;
+             const real_t tmp_kernel_op_102 = (tmp_kernel_op_101*tmp_kernel_op_101);
+             const real_t tmp_kernel_op_103 = p_affine_0_1 + tmp_kernel_op_13*0.16666666666666666 + tmp_kernel_op_15*0.16666666666666666;
+             const real_t tmp_kernel_op_104 = (tmp_kernel_op_103*tmp_kernel_op_103);
+             const real_t tmp_kernel_op_105 = tmp_kernel_op_102 + tmp_kernel_op_104;
+             const real_t tmp_kernel_op_106 = pow(tmp_kernel_op_105, -0.50000000000000000)*tmp_kernel_op_25;
+             const real_t tmp_kernel_op_107 = tmp_kernel_op_101*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_108 = pow(tmp_kernel_op_105, -1.5000000000000000);
+             const real_t tmp_kernel_op_109 = radRayVertex + tmp_kernel_op_24*(-tmp_kernel_op_21*(tmp_kernel_op_103 + tmp_kernel_op_29) + tmp_kernel_op_5*(tmp_kernel_op_101 + tmp_kernel_op_30));
+             const real_t tmp_kernel_op_110 = tmp_kernel_op_108*tmp_kernel_op_109*1.0;
+             const real_t tmp_kernel_op_111 = tmp_kernel_op_104*tmp_kernel_op_110 + tmp_kernel_op_107*tmp_kernel_op_5;
+             const real_t tmp_kernel_op_112 = tmp_kernel_op_103*tmp_kernel_op_106;
+             const real_t tmp_kernel_op_113 = tmp_kernel_op_101*tmp_kernel_op_103*tmp_kernel_op_108*tmp_kernel_op_109*1.0 + tmp_kernel_op_107*tmp_kernel_op_21;
+             const real_t tmp_kernel_op_114 = 1.0 / (tmp_kernel_op_111*(tmp_kernel_op_102*tmp_kernel_op_108*tmp_kernel_op_109*1.0 - tmp_kernel_op_112*tmp_kernel_op_21) + tmp_kernel_op_113*(-tmp_kernel_op_101*tmp_kernel_op_103*tmp_kernel_op_110 + tmp_kernel_op_112*tmp_kernel_op_5));
+             const real_t tmp_kernel_op_115 = tmp_kernel_op_111*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_116 = tmp_kernel_op_113*tmp_kernel_op_114;
+             const real_t tmp_kernel_op_117 = jac_affine_inv_0_0_BLUE*tmp_kernel_op_116 + jac_affine_inv_0_1_BLUE*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_118 = jac_affine_inv_1_0_BLUE*tmp_kernel_op_116 + jac_affine_inv_1_1_BLUE*tmp_kernel_op_115;
+             const real_t tmp_kernel_op_119 = tmp_kernel_op_41 + tmp_kernel_op_6*0.16666666666666666 + tmp_kernel_op_8*0.16666666666666666;
+             const real_t tmp_kernel_op_120 = (tmp_kernel_op_119*tmp_kernel_op_119);
+             const real_t tmp_kernel_op_121 = tmp_kernel_op_12*0.16666666666666666 + tmp_kernel_op_14*0.16666666666666666 + tmp_kernel_op_44;
+             const real_t tmp_kernel_op_122 = (tmp_kernel_op_121*tmp_kernel_op_121);
+             const real_t tmp_kernel_op_123 = tmp_kernel_op_120 + tmp_kernel_op_122;
+             const real_t tmp_kernel_op_124 = pow(tmp_kernel_op_123, -0.50000000000000000)*tmp_kernel_op_49;
+             const real_t tmp_kernel_op_125 = tmp_kernel_op_119*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_126 = tmp_kernel_op_20*(rayVertex_1 + tmp_kernel_op_121) - tmp_kernel_op_4*(rayVertex_0 + tmp_kernel_op_119);
+             const real_t tmp_kernel_op_127 = pow(tmp_kernel_op_123, -1.5000000000000000)*1.0;
+             const real_t tmp_kernel_op_128 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_129 = tmp_kernel_op_121*tmp_kernel_op_124;
+             const real_t tmp_kernel_op_130 = tmp_kernel_op_127*(radRayVertex + tmp_kernel_op_126*tmp_kernel_op_48);
+             const real_t tmp_kernel_op_131 = tmp_kernel_op_119*tmp_kernel_op_121;
+             const real_t tmp_kernel_op_132 = abs_det_jac_affine_BLUE*0.16666666666666666*abs(-(tmp_kernel_op_120*tmp_kernel_op_130 + tmp_kernel_op_129*tmp_kernel_op_20)*(-tmp_kernel_op_122*tmp_kernel_op_128 + tmp_kernel_op_125*tmp_kernel_op_4) + (tmp_kernel_op_125*tmp_kernel_op_20 - tmp_kernel_op_130*tmp_kernel_op_131)*(tmp_kernel_op_128*tmp_kernel_op_131 + tmp_kernel_op_129*tmp_kernel_op_4));
+             const real_t tmp_kernel_op_133 = tmp_kernel_op_132*(-tmp_kernel_op_100*tmp_kernel_op_117 - tmp_kernel_op_100*tmp_kernel_op_118);
+             const real_t tmp_kernel_op_134 = tmp_kernel_op_0*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_135 = tmp_kernel_op_39*(tmp_kernel_op_1 - 1.0);
+             const real_t tmp_kernel_op_136 = tmp_kernel_op_60*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_137 = tmp_kernel_op_80*(tmp_kernel_op_61 - 1.0);
+             const real_t tmp_kernel_op_138 = tmp_kernel_op_132*tmp_kernel_op_97;
+             const real_t tmp_kernel_op_139 = tmp_kernel_op_117*(tmp_kernel_op_98 - 1.0);
+             const real_t tmp_kernel_op_140 = tmp_kernel_op_135*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_141 = tmp_kernel_op_137*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_142 = tmp_kernel_op_132*tmp_kernel_op_139;
+             const real_t tmp_kernel_op_143 = tmp_kernel_op_40*(tmp_kernel_op_2 - 1.0);
+             const real_t tmp_kernel_op_144 = tmp_kernel_op_81*(tmp_kernel_op_62 - 1.0);
+             const real_t tmp_kernel_op_145 = tmp_kernel_op_118*(tmp_kernel_op_99 - 1.0);
+             const real_t tmp_kernel_op_146 = tmp_kernel_op_143*tmp_kernel_op_58;
+             const real_t tmp_kernel_op_147 = tmp_kernel_op_144*tmp_kernel_op_95;
+             const real_t tmp_kernel_op_148 = tmp_kernel_op_132*tmp_kernel_op_145;
+             const real_t tmp_kernel_op_149 = tmp_kernel_op_1*tmp_kernel_op_40;
+             const real_t tmp_kernel_op_150 = tmp_kernel_op_2*tmp_kernel_op_39;
+             const real_t tmp_kernel_op_151 = tmp_kernel_op_58*(-tmp_kernel_op_149 - tmp_kernel_op_150);
+             const real_t tmp_kernel_op_152 = tmp_kernel_op_61*tmp_kernel_op_81;
+             const real_t tmp_kernel_op_153 = tmp_kernel_op_62*tmp_kernel_op_80;
+             const real_t tmp_kernel_op_154 = tmp_kernel_op_95*(-tmp_kernel_op_152 - tmp_kernel_op_153);
+             const real_t tmp_kernel_op_155 = tmp_kernel_op_118*tmp_kernel_op_98;
+             const real_t tmp_kernel_op_156 = tmp_kernel_op_117*tmp_kernel_op_99;
+             const real_t tmp_kernel_op_157 = tmp_kernel_op_132*(-tmp_kernel_op_155 - tmp_kernel_op_156);
+             const real_t tmp_kernel_op_158 = tmp_kernel_op_58*(tmp_kernel_op_150 - tmp_kernel_op_40*(-tmp_kernel_op_1 - 1.333333333333333));
+             const real_t tmp_kernel_op_159 = tmp_kernel_op_95*(tmp_kernel_op_153 - tmp_kernel_op_81*(-tmp_kernel_op_61 + 2.666666666666667));
+             const real_t tmp_kernel_op_160 = tmp_kernel_op_132*(-tmp_kernel_op_118*(-tmp_kernel_op_98 + 2.666666666666667) + tmp_kernel_op_156);
+             const real_t tmp_kernel_op_161 = tmp_kernel_op_58*(tmp_kernel_op_149 - tmp_kernel_op_39*(-tmp_kernel_op_2 + 2.666666666666667));
+             const real_t tmp_kernel_op_162 = tmp_kernel_op_95*(tmp_kernel_op_152 - tmp_kernel_op_80*(-tmp_kernel_op_62 - 1.333333333333333));
+             const real_t tmp_kernel_op_163 = tmp_kernel_op_132*(-tmp_kernel_op_117*(-tmp_kernel_op_99 + 2.666666666666667) + tmp_kernel_op_155);
+             const real_t elMat_0_0 = tmp_kernel_op_0*tmp_kernel_op_59 + tmp_kernel_op_133*tmp_kernel_op_97 + tmp_kernel_op_60*tmp_kernel_op_96;
+             const real_t elMat_0_1 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.16666666666666666 + tmp_kernel_op_96*0.66666666666666663;
+             const real_t elMat_0_2 = tmp_kernel_op_133*0.16666666666666666 + tmp_kernel_op_59*0.66666666666666663 + tmp_kernel_op_96*0.16666666666666666;
+             const real_t elMat_1_0 = -tmp_kernel_op_134*tmp_kernel_op_135 - tmp_kernel_op_136*tmp_kernel_op_137 - tmp_kernel_op_138*tmp_kernel_op_139;
+             const real_t elMat_1_1 = tmp_kernel_op_140*-0.16666666666666666 + tmp_kernel_op_141*-0.66666666666666663 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_1_2 = tmp_kernel_op_140*-0.66666666666666663 + tmp_kernel_op_141*-0.16666666666666666 + tmp_kernel_op_142*-0.16666666666666666;
+             const real_t elMat_2_0 = -tmp_kernel_op_134*tmp_kernel_op_143 - tmp_kernel_op_136*tmp_kernel_op_144 - tmp_kernel_op_138*tmp_kernel_op_145;
+             const real_t elMat_2_1 = tmp_kernel_op_146*-0.16666666666666666 + tmp_kernel_op_147*-0.66666666666666663 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_2_2 = tmp_kernel_op_146*-0.66666666666666663 + tmp_kernel_op_147*-0.16666666666666666 + tmp_kernel_op_148*-0.16666666666666666;
+             const real_t elMat_3_0 = tmp_kernel_op_0*tmp_kernel_op_151 + tmp_kernel_op_154*tmp_kernel_op_60 + tmp_kernel_op_157*tmp_kernel_op_97;
+             const real_t elMat_3_1 = tmp_kernel_op_151*0.16666666666666666 + tmp_kernel_op_154*0.66666666666666663 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_3_2 = tmp_kernel_op_151*0.66666666666666663 + tmp_kernel_op_154*0.16666666666666666 + tmp_kernel_op_157*0.16666666666666666;
+             const real_t elMat_4_0 = tmp_kernel_op_0*tmp_kernel_op_158 + tmp_kernel_op_159*tmp_kernel_op_60 + tmp_kernel_op_160*tmp_kernel_op_97;
+             const real_t elMat_4_1 = tmp_kernel_op_158*0.16666666666666666 + tmp_kernel_op_159*0.66666666666666663 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_4_2 = tmp_kernel_op_158*0.66666666666666663 + tmp_kernel_op_159*0.16666666666666666 + tmp_kernel_op_160*0.16666666666666666;
+             const real_t elMat_5_0 = tmp_kernel_op_0*tmp_kernel_op_161 + tmp_kernel_op_162*tmp_kernel_op_60 + tmp_kernel_op_163*tmp_kernel_op_97;
+             const real_t elMat_5_1 = tmp_kernel_op_161*0.16666666666666666 + tmp_kernel_op_162*0.66666666666666663 + tmp_kernel_op_163*0.16666666666666666;
+             const real_t elMat_5_2 = tmp_kernel_op_161*0.66666666666666663 + tmp_kernel_op_162*0.16666666666666666 + tmp_kernel_op_163*0.16666666666666666;
+         
+             std::vector< uint_t > _data_rowIdx( 6 );
+             std::vector< uint_t > _data_colIdx( 3 );
+             std::vector< real_t > _data_mat( 18 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]));
+             _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_1_0));
+             _data_mat[4] = ((real_t)(elMat_1_1));
+             _data_mat[5] = ((real_t)(elMat_1_2));
+             _data_mat[6] = ((real_t)(elMat_2_0));
+             _data_mat[7] = ((real_t)(elMat_2_1));
+             _data_mat[8] = ((real_t)(elMat_2_2));
+             _data_mat[9] = ((real_t)(elMat_3_0));
+             _data_mat[10] = ((real_t)(elMat_3_1));
+             _data_mat[11] = ((real_t)(elMat_3_2));
+             _data_mat[12] = ((real_t)(elMat_4_0));
+             _data_mat[13] = ((real_t)(elMat_4_1));
+             _data_mat[14] = ((real_t)(elMat_4_2));
+             _data_mat[15] = ((real_t)(elMat_5_0));
+             _data_mat[16] = ((real_t)(elMat_5_1));
+             _data_mat[17] = ((real_t)(elMat_5_2));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/CMakeLists.txt b/operators/k_mass/CMakeLists.txt
index 35ffc3e029637fd7f8741b44702723588ac45d6c..25a69bc7bd4cda14567bd46d0ef7cf27f3fdb3dc 100644
--- a/operators/k_mass/CMakeLists.txt
+++ b/operators/k_mass/CMakeLists.txt
@@ -2,6 +2,8 @@ add_library( opgen-k_mass
 
    P1ElementwiseKMass.cpp
    P1ElementwiseKMass.hpp
+   P1ElementwiseKMassAnnulusMap.cpp
+   P1ElementwiseKMassAnnulusMap.hpp
    P1ElementwiseKMassIcosahedralShellMap.cpp
    P1ElementwiseKMassIcosahedralShellMap.hpp
 )
@@ -9,12 +11,15 @@ add_library( opgen-k_mass
 if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
    target_sources(opgen-k_mass PRIVATE
 
+      avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+      avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
       avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P1ElementwiseKMass_apply_macro_2D.cpp
       avx/P1ElementwiseKMass_apply_macro_3D.cpp
       avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
       noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
       noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp
       noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp
@@ -22,6 +27,8 @@ if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY)
 
    set_source_files_properties(
 
+      avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+      avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
       avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
       avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
       avx/P1ElementwiseKMass_apply_macro_2D.cpp
@@ -38,6 +45,9 @@ else()
 
    target_sources(opgen-k_mass PRIVATE
 
+      noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
+      noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
       noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp
       noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp
       noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp
diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2c31addfb6008ef446d4197b01ad9b657fffe5de
--- /dev/null
+++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp
@@ -0,0 +1,366 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe
+// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a
+// warning in an internal standard library header (bits/stl_algobase.h). As a
+// workaround, we disable the warning and include this header indirectly through
+// a public header.
+#include <waLBerlaDefinitions.h>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+#include <cmath>
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#include "P1ElementwiseKMassAnnulusMap.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+P1ElementwiseKMassAnnulusMap::P1ElementwiseKMassAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                                            size_t                                     minLevel,
+                                                            size_t                                     maxLevel,
+                                                            const P1Function< real_t >&                _k )
+: Operator( storage, minLevel, maxLevel )
+, k( _k )
+{}
+
+void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src,
+                                          const P1Function< real_t >& dst,
+                                          uint_t                      level,
+                                          DoFType                     flag,
+                                          UpdateType                  updateType ) const
+{
+   this->startTiming( "apply" );
+
+   // Make sure that halos are up-to-date
+   this->timingTree_->start( "pre-communication" );
+   if ( this->storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH );
+      communication::syncFunctionBetweenPrimitives( k, level, communication::syncDirection_t::LOW2HIGH );
+   }
+   this->timingTree_->stop( "pre-communication" );
+
+   if ( updateType == Replace )
+   {
+      // We need to zero the destination array (including halos).
+      // However, we must not zero out anything that is not flagged with the specified BCs.
+      // Therefore, we first zero out everything that flagged, and then, later,
+      // the halos of the highest dim primitives.
+      dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data in the functions
+         real_t* _data_src = face.getData( src.getFaceDataID() )->getPointer( level );
+         real_t* _data_dst = face.getData( dst.getFaceDataID() )->getPointer( level );
+         real_t* _data_k   = face.getData( k.getFaceDataID() )->getPointer( level );
+
+         // Zero out dst halos only
+         //
+         // This is also necessary when using update type == Add.
+         // During additive comm we then skip zeroing the data on the lower-dim primitives.
+         for ( const auto& idx : vertexdof::macroface::Iterator( level ) )
+         {
+            if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) )
+            {
+               auto arrayIdx       = vertexdof::macroface::index( level, idx.x(), idx.y() );
+               _data_dst[arrayIdx] = real_t( 0 );
+            }
+         }
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         apply_macro_2D(
+
+             _data_dst,
+             _data_k,
+             _data_src,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+
+      // Push result to lower-dimensional primitives
+      //
+      this->timingTree_->start( "post-communication" );
+      // Note: We could avoid communication here by implementing the apply() also for the respective
+      //       lower dimensional primitives!
+      dst.communicateAdditively< Face, Edge >( level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      dst.communicateAdditively< Face, Vertex >( level, DoFType::All ^ flag, *storage_, updateType == Replace );
+      this->timingTree_->stop( "post-communication" );
+   }
+
+   this->stopTiming( "apply" );
+}
+void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                                             const P1Function< idx_t >&                  src,
+                                             const P1Function< idx_t >&                  dst,
+                                             uint_t                                      level,
+                                             DoFType                                     flag ) const
+{
+   this->startTiming( "toMatrix" );
+
+   // We currently ignore the flag provided!
+   if ( flag != All )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" );
+   }
+
+   if ( storage_->hasGlobalCells() )
+   {
+      this->timingTree_->start( "pre-communication" );
+      k.communicate< Face, Cell >( level );
+      k.communicate< Edge, Cell >( level );
+      k.communicate< Vertex, Cell >( level );
+      this->timingTree_->stop( "pre-communication" );
+
+      WALBERLA_ABORT( "Not implemented." );
+   }
+   else
+   {
+      this->timingTree_->start( "pre-communication" );
+      communication::syncFunctionBetweenPrimitives( k, level, communication::syncDirection_t::LOW2HIGH );
+      this->timingTree_->stop( "pre-communication" );
+
+      for ( auto& it : storage_->getFaces() )
+      {
+         Face& face = *it.second;
+
+         // get hold of the actual numerical data
+         idx_t*  _data_src = face.getData( src.getFaceDataID() )->getPointer( level );
+         idx_t*  _data_dst = face.getData( dst.getFaceDataID() )->getPointer( level );
+         real_t* _data_k   = face.getData( k.getFaceDataID() )->getPointer( level );
+
+         const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+         const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+         const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+         const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+         const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+         const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+         const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+         const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+         WALBERLA_CHECK_NOT_NULLPTR(
+             std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+             "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+         real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+         real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+         real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+         real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+         real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+         real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+         real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+         real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+         this->timingTree_->start( "kernel" );
+
+         toMatrix_macro_2D(
+
+             _data_dst,
+             _data_k,
+             _data_src,
+             macro_vertex_coord_id_0comp0,
+             macro_vertex_coord_id_0comp1,
+             macro_vertex_coord_id_1comp0,
+             macro_vertex_coord_id_1comp1,
+             macro_vertex_coord_id_2comp0,
+             macro_vertex_coord_id_2comp1,
+             mat,
+             micro_edges_per_macro_edge,
+             micro_edges_per_macro_edge_float,
+             radRayVertex,
+             radRefVertex,
+             rayVertex_0,
+             rayVertex_1,
+             refVertex_0,
+             refVertex_1,
+             thrVertex_0,
+             thrVertex_1 );
+         this->timingTree_->stop( "kernel" );
+      }
+   }
+   this->stopTiming( "toMatrix" );
+}
+void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues()
+{
+   this->startTiming( "computeInverseDiagonalOperatorValues" );
+
+   if ( invDiag_ == nullptr )
+   {
+      invDiag_ = std::make_shared< P1Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ );
+   }
+
+   for ( uint_t level = minLevel_; level <= maxLevel_; level++ )
+   {
+      invDiag_->setToZero( level );
+
+      if ( storage_->hasGlobalCells() )
+      {
+         this->timingTree_->start( "pre-communication" );
+         k.communicate< Face, Cell >( level );
+         k.communicate< Edge, Cell >( level );
+         k.communicate< Vertex, Cell >( level );
+         this->timingTree_->stop( "pre-communication" );
+
+         WALBERLA_ABORT( "Not implemented." );
+      }
+      else
+      {
+         this->timingTree_->start( "pre-communication" );
+         communication::syncFunctionBetweenPrimitives( k, level, communication::syncDirection_t::LOW2HIGH );
+         this->timingTree_->stop( "pre-communication" );
+
+         for ( auto& it : storage_->getFaces() )
+         {
+            Face& face = *it.second;
+
+            // get hold of the actual numerical data
+            real_t* _data_invDiag_ = face.getData( ( *invDiag_ ).getFaceDataID() )->getPointer( level );
+            real_t* _data_k        = face.getData( k.getFaceDataID() )->getPointer( level );
+
+            const auto   micro_edges_per_macro_edge       = (int64_t) levelinfo::num_microedges_per_edge( level );
+            const auto   micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level );
+            const real_t macro_vertex_coord_id_0comp0     = (real_t) face.getCoordinates()[0][0];
+            const real_t macro_vertex_coord_id_0comp1     = (real_t) face.getCoordinates()[0][1];
+            const real_t macro_vertex_coord_id_1comp0     = (real_t) face.getCoordinates()[1][0];
+            const real_t macro_vertex_coord_id_1comp1     = (real_t) face.getCoordinates()[1][1];
+            const real_t macro_vertex_coord_id_2comp0     = (real_t) face.getCoordinates()[2][0];
+            const real_t macro_vertex_coord_id_2comp1     = (real_t) face.getCoordinates()[2][1];
+            WALBERLA_CHECK_NOT_NULLPTR(
+                std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ),
+                "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." )
+            real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex();
+            real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex();
+            real_t refVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0];
+            real_t rayVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0];
+            real_t thrVertex_0  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0];
+            real_t refVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1];
+            real_t rayVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1];
+            real_t thrVertex_1  = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1];
+
+            this->timingTree_->start( "kernel" );
+
+            computeInverseDiagonalOperatorValues_macro_2D(
+
+                _data_invDiag_,
+                _data_k,
+                macro_vertex_coord_id_0comp0,
+                macro_vertex_coord_id_0comp1,
+                macro_vertex_coord_id_1comp0,
+                macro_vertex_coord_id_1comp1,
+                macro_vertex_coord_id_2comp0,
+                macro_vertex_coord_id_2comp1,
+                micro_edges_per_macro_edge,
+                micro_edges_per_macro_edge_float,
+                radRayVertex,
+                radRefVertex,
+                rayVertex_0,
+                rayVertex_1,
+                refVertex_0,
+                refVertex_1,
+                thrVertex_0,
+                thrVertex_1 );
+            this->timingTree_->stop( "kernel" );
+         }
+
+         // Push result to lower-dimensional primitives
+         //
+         this->timingTree_->start( "post-communication" );
+         // Note: We could avoid communication here by implementing the apply() also for the respective
+         //       lower dimensional primitives!
+         ( *invDiag_ ).communicateAdditively< Face, Edge >( level );
+         ( *invDiag_ ).communicateAdditively< Face, Vertex >( level );
+         this->timingTree_->stop( "post-communication" );
+      }
+
+      ( *invDiag_ ).invertElementwise( level );
+   }
+
+   this->stopTiming( "computeInverseDiagonalOperatorValues" );
+}
+std::shared_ptr< P1Function< real_t > > P1ElementwiseKMassAnnulusMap::getInverseDiagonalValues() const
+{
+   return invDiag_;
+}
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..37c33555afb8396cf2325c8d1f8f1016298e056f
--- /dev/null
+++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp
@@ -0,0 +1,168 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "hyteg/LikwidWrapper.hpp"
+#include "hyteg/communication/Syncing.hpp"
+#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp"
+#include "hyteg/geometry/AnnulusMap.hpp"
+#include "hyteg/operators/Operator.hpp"
+#include "hyteg/p1functionspace/P1Function.hpp"
+#include "hyteg/primitivestorage/PrimitiveStorage.hpp"
+#include "hyteg/solvers/Smoothables.hpp"
+#include "hyteg/sparseassembly/SparseMatrixProxy.hpp"
+
+#define FUNC_PREFIX
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+/// Diffusion operator with a scalar coefficient.
+///
+/// Geometry map: AnnulusMap
+///
+/// Weak formulation
+///
+///     u: trial function (space: Lagrange, degree: 1)
+///     v: test function  (space: Lagrange, degree: 1)
+///     k: coefficient    (space: Lagrange, degree: 1)
+///
+///     ∫ k uv
+
+class P1ElementwiseKMassAnnulusMap : public Operator< P1Function< real_t >, P1Function< real_t > >,
+                                     public OperatorWithInverseDiagonal< P1Function< real_t > >
+{
+ public:
+   P1ElementwiseKMassAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage,
+                                 size_t                                     minLevel,
+                                 size_t                                     maxLevel,
+                                 const P1Function< real_t >&                _k );
+
+   void apply( const P1Function< real_t >& src,
+               const P1Function< real_t >& dst,
+               uint_t                      level,
+               DoFType                     flag,
+               UpdateType                  updateType = Replace ) const;
+
+   void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat,
+                  const P1Function< idx_t >&                  src,
+                  const P1Function< idx_t >&                  dst,
+                  uint_t                                      level,
+                  DoFType                                     flag ) const;
+
+   void computeInverseDiagonalOperatorValues();
+
+   std::shared_ptr< P1Function< real_t > > getInverseDiagonalValues() const;
+
+ protected:
+ private:
+   /// Kernel type: apply
+   /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    251     273      18      12      6              0                 0              1
+   void apply_macro_2D( real_t* RESTRICT _data_dst,
+                        real_t* RESTRICT _data_k,
+                        real_t* RESTRICT _data_src,
+                        real_t           macro_vertex_coord_id_0comp0,
+                        real_t           macro_vertex_coord_id_0comp1,
+                        real_t           macro_vertex_coord_id_1comp0,
+                        real_t           macro_vertex_coord_id_1comp1,
+                        real_t           macro_vertex_coord_id_2comp0,
+                        real_t           macro_vertex_coord_id_2comp1,
+                        int64_t          micro_edges_per_macro_edge,
+                        real_t           micro_edges_per_macro_edge_float,
+                        real_t           radRayVertex,
+                        real_t           radRefVertex,
+                        real_t           rayVertex_0,
+                        real_t           rayVertex_1,
+                        real_t           refVertex_0,
+                        real_t           refVertex_1,
+                        real_t           thrVertex_0,
+                        real_t           thrVertex_1 ) const;
+   /// Kernel type: toMatrix
+   /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    242     264      18      12      6              0                 0              4
+   void toMatrix_macro_2D( idx_t* RESTRICT                      _data_dst,
+                           real_t* RESTRICT                     _data_k,
+                           idx_t* RESTRICT                      _data_src,
+                           real_t                               macro_vertex_coord_id_0comp0,
+                           real_t                               macro_vertex_coord_id_0comp1,
+                           real_t                               macro_vertex_coord_id_1comp0,
+                           real_t                               macro_vertex_coord_id_1comp1,
+                           real_t                               macro_vertex_coord_id_2comp0,
+                           real_t                               macro_vertex_coord_id_2comp1,
+                           std::shared_ptr< SparseMatrixProxy > mat,
+                           int64_t                              micro_edges_per_macro_edge,
+                           real_t                               micro_edges_per_macro_edge_float,
+                           real_t                               radRayVertex,
+                           real_t                               radRefVertex,
+                           real_t                               rayVertex_0,
+                           real_t                               rayVertex_1,
+                           real_t                               refVertex_0,
+                           real_t                               refVertex_1,
+                           real_t                               thrVertex_0,
+                           real_t                               thrVertex_1 ) const;
+   /// Kernel type: computeInverseDiagonalOperatorValues
+   /// - quadrature rule: Dunavant 4 | points: 6, degree: 4
+   /// - operations per element:
+   ///   adds    muls    divs    pows    abs    assignments    function_calls    unknown_ops
+   /// ------  ------  ------  ------  -----  -------------  ----------------  -------------
+   ///    227     246      18      12      6              0                 0              1
+   void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_,
+                                                       real_t* RESTRICT _data_k,
+                                                       real_t           macro_vertex_coord_id_0comp0,
+                                                       real_t           macro_vertex_coord_id_0comp1,
+                                                       real_t           macro_vertex_coord_id_1comp0,
+                                                       real_t           macro_vertex_coord_id_1comp1,
+                                                       real_t           macro_vertex_coord_id_2comp0,
+                                                       real_t           macro_vertex_coord_id_2comp1,
+                                                       int64_t          micro_edges_per_macro_edge,
+                                                       real_t           micro_edges_per_macro_edge_float,
+                                                       real_t           radRayVertex,
+                                                       real_t           radRefVertex,
+                                                       real_t           rayVertex_0,
+                                                       real_t           rayVertex_1,
+                                                       real_t           refVertex_0,
+                                                       real_t           refVertex_1,
+                                                       real_t           thrVertex_0,
+                                                       real_t           thrVertex_1 ) const;
+
+   std::shared_ptr< P1Function< real_t > > invDiag_;
+   P1Function< real_t >                    k;
+};
+
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c891b3871712d52e4fbe14d6118fed139291eac4
--- /dev/null
+++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
@@ -0,0 +1,397 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ElementwiseKMassAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
+   
+       const real_t _data_q_p_0 [] = {0.44594849091596489, 0.091576213509770715, 0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t _data_q_p_1 [] = {0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_6 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_7 = (radRayVertex - radRefVertex)*1.0 / (tmp_qloop_0*(rayVertex_0 - refVertex_0) - tmp_qloop_6*(rayVertex_1 - refVertex_1));
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196505)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196532)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d k_dof_0 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d k_dof_1 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d k_dof_2 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const __m256d tmp_qloop_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_3),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_8);
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_3);
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(k_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))),_mm256_mul_pd(k_dof_1,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(k_dof_2,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_15),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(tmp_qloop_14,tmp_qloop_2)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q]));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1]));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2]));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3]));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4]));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5]));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                   const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                   const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                   const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                   const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                   const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
+                   const real_t q_tmp_0_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1];
+                   const real_t q_tmp_0_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2];
+                   const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
+                   const real_t q_tmp_1_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4];
+                   const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2;
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196505)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196532)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                const __m256d k_dof_0 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d k_dof_1 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d k_dof_2 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const __m256d tmp_qloop_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_3),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_8);
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_3);
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(k_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))),_mm256_mul_pd(k_dof_1,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(k_dof_2,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_15),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(tmp_qloop_14,tmp_qloop_2)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q]));
+                   const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1]));
+                   const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2]));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3]));
+                   const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4]));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5]));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1);
+                   q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                }
+                const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2));
+                const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_1,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2));
+                const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_2,src_dof_0),_mm256_mul_pd(q_acc_1_2,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_0_1 = 0.0;
+                real_t q_acc_0_2 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_1_2 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                   const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                   const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                   const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                   const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                   const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
+                   const real_t q_tmp_0_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1];
+                   const real_t q_tmp_0_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2];
+                   const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
+                   const real_t q_tmp_1_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4];
+                   const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                   q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                }
+                const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2;
+                const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2;
+                const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2;
+                _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..63a79e1220dca7f8646073f6905250632b8f4778
--- /dev/null
+++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,349 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ElementwiseKMassAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
+   
+       const real_t _data_q_p_0 [] = {0.44594849091596489, 0.091576213509770715, 0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t _data_q_p_1 [] = {0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_6 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_7 = (radRayVertex - radRefVertex)*1.0 / (tmp_qloop_0*(rayVertex_0 - refVertex_0) - tmp_qloop_6*(rayVertex_1 - refVertex_1));
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196505)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196532)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d k_dof_0 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]);
+                const __m256d k_dof_1 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d k_dof_2 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const __m256d tmp_qloop_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_3),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_8);
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_3);
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(k_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))),_mm256_mul_pd(k_dof_1,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(k_dof_2,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_15),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(tmp_qloop_14,tmp_qloop_2)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q]));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3]));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5],_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5]));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                _mm256_storeu_pd(&_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                   const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                   const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                   const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                   const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                   const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
+                   const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
+                   const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+                _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             }
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196505)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196532)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          {
+             for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0));
+                const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1));
+                const __m256d k_dof_0 = _mm256_loadu_pd(& _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]);
+                const __m256d k_dof_1 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]);
+                const __m256d k_dof_2 = _mm256_loadu_pd(& _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]);
+                __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0);
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const __m256d tmp_qloop_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_1);
+                   const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_1_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_2_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_0_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)));
+                   const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3);
+                   const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4);
+                   const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7));
+                   const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_8);
+                   const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1),tmp_qloop_3),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0),tmp_qloop_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)));
+                   const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)),_mm256_set_pd(1.0,1.0,1.0,1.0));
+                   const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_8);
+                   const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_11,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)));
+                   const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_3);
+                   const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(k_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))),_mm256_mul_pd(k_dof_1,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_mul_pd(k_dof_2,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_castsi256_pd(_mm256_and_si256(_mm256_set1_epi64x(0x7fffffffffffffff), _mm256_castpd_si256(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_15),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)),_mm256_mul_pd(tmp_qloop_14,tmp_qloop_2)),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(tmp_qloop_0,tmp_qloop_0,tmp_qloop_0,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_4),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))))))));
+                   const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q]));
+                   const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3]));
+                   const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_set_pd(_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5],_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5]));
+                   q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0);
+                   q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1);
+                   q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2);
+                }
+                const __m256d elMatDiag_0 = q_acc_0_0;
+                const __m256d elMatDiag_1 = q_acc_1_1;
+                const __m256d elMatDiag_2 = q_acc_2_2;
+                _mm256_storeu_pd(&_data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])));
+                _mm256_storeu_pd(&_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])));
+                _mm256_storeu_pd(&_data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])));
+             }
+             for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+             {
+            
+                const int64_t phantom_ctr_0 = ctr_0;
+                real_t _data_float_loop_ctr_array_dim_0[4];
+                _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+                _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+                _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+                _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+                real_t _data_float_loop_ctr_array_dim_1[4];
+                _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+                _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+            
+                const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+                const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+                const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+                real_t q_acc_0_0 = 0.0;
+                real_t q_acc_1_1 = 0.0;
+                real_t q_acc_2_2 = 0.0;
+                for (int64_t q = 0; q < 6; q += 1)
+                {
+                   const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                   const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                   const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                   const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                   const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                   const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                   const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                   const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                   const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                   const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                   const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                   const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                   const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                   const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
+                   const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
+                   const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
+                   q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                   q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                   q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+                }
+                const real_t elMatDiag_0 = q_acc_0_0;
+                const real_t elMatDiag_1 = q_acc_1_1;
+                const real_t elMatDiag_2 = q_acc_2_2;
+                _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+                _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+                _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             }
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3757f7e0b67ac87ca4b2136d0e76534dd5b38339
--- /dev/null
+++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp
@@ -0,0 +1,255 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ElementwiseKMassAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, real_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
+   
+       const real_t _data_q_p_0 [] = {0.44594849091596489, 0.091576213509770715, 0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t _data_q_p_1 [] = {0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_6 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_7 = (radRayVertex - radRefVertex)*1.0 / (tmp_qloop_0*(rayVertex_0 - refVertex_0) - tmp_qloop_6*(rayVertex_1 - refVertex_1));
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196505)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196532)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t src_dof_1 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             for (int64_t q = 0; q < 6; q += 1)
+             {
+                const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
+                const real_t q_tmp_0_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1];
+                const real_t q_tmp_0_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2];
+                const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
+                const real_t q_tmp_1_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4];
+                const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2;
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196505)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196532)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t src_dof_0 = _data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t src_dof_1 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t src_dof_2 = _data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             for (int64_t q = 0; q < 6; q += 1)
+             {
+                const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
+                const real_t q_tmp_0_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1];
+                const real_t q_tmp_0_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2];
+                const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
+                const real_t q_tmp_1_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4];
+                const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+             }
+             const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2;
+             const real_t elMatVec_1 = q_acc_0_1*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2;
+             const real_t elMatVec_2 = q_acc_0_2*src_dof_0 + q_acc_1_2*src_dof_1 + q_acc_2_2*src_dof_2;
+             _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..125fac4bc0b8ac56e9636c1f3da30fd86975a59f
--- /dev/null
+++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp
@@ -0,0 +1,231 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ElementwiseKMassAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT  _data_invDiag_, real_t * RESTRICT  _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
+   
+       const real_t _data_q_p_0 [] = {0.44594849091596489, 0.091576213509770715, 0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t _data_q_p_1 [] = {0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_6 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_7 = (radRayVertex - radRefVertex)*1.0 / (tmp_qloop_0*(rayVertex_0 - refVertex_0) - tmp_qloop_6*(rayVertex_1 - refVertex_1));
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196505)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196532)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             for (int64_t q = 0; q < 6; q += 1)
+             {
+                const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
+                const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
+                const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196505)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196532)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             for (int64_t q = 0; q < 6; q += 1)
+             {
+                const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
+                const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
+                const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+             }
+             const real_t elMatDiag_0 = q_acc_0_0;
+             const real_t elMatDiag_1 = q_acc_1_1;
+             const real_t elMatDiag_2 = q_acc_2_2;
+             _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg
diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..86e9f346908ecbfc26385a30f089913d64cc3e2f
--- /dev/null
+++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp
@@ -0,0 +1,311 @@
+/*
+* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm.
+*
+* This file is part of HyTeG
+* (see https://i10git.cs.fau.de/hyteg/hyteg).
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+* The entire file was generated with the HyTeG form generator.
+*
+* Avoid modifying this file. If buggy, consider fixing the generator itself.
+*/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#include "../P1ElementwiseKMassAnnulusMap.hpp"
+
+#define FUNC_PREFIX  
+
+namespace hyteg {
+
+namespace operatorgeneration {
+
+void P1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT  _data_dst, real_t * RESTRICT  _data_k, idx_t * RESTRICT  _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const
+{
+    {
+       const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949};
+   
+       const real_t _data_q_p_0 [] = {0.44594849091596489, 0.091576213509770715, 0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t _data_q_p_1 [] = {0.10810301816807022, 0.81684757298045851, 0.44594849091596489, 0.091576213509770715, 0.44594849091596489, 0.091576213509770715};
+   
+       const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0;
+       const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1;
+       const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY;
+       const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY;
+       const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY;
+       const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY;
+       const real_t abs_det_jac_affine_GRAY = abs(jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY);
+       const real_t tmp_qloop_0 = rayVertex_1 - thrVertex_1;
+       const real_t tmp_qloop_6 = rayVertex_0 - thrVertex_0;
+       const real_t tmp_qloop_7 = (radRayVertex - radRefVertex)*1.0 / (tmp_qloop_0*(rayVertex_0 - refVertex_0) - tmp_qloop_6*(rayVertex_1 - refVertex_1));
+       {
+          /* FaceType.GRAY */
+          const real_t _data_phi_psi_jac_affine_det_0_0_GRAY [] = {((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196505)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196532)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.01168626253704612)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.04820837781551205)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.19887005655022641)), ((real_t)(abs_det_jac_affine_GRAY*0.6672399574840655)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.074803807748196491)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_GRAY*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))];
+             const real_t k_dof_1 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             for (int64_t q = 0; q < 6; q += 1)
+             {
+                const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q];
+                const real_t q_tmp_0_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 1];
+                const real_t q_tmp_0_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 2];
+                const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 3];
+                const real_t q_tmp_1_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 4];
+                const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_GRAY[6*q + 5];
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+         
+             std::vector< uint_t > _data_rowIdx( 3 );
+             std::vector< uint_t > _data_colIdx( 3 );
+             std::vector< real_t > _data_mat( 9 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[0] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]));
+             _data_colIdx[1] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[2] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_1_0));
+             _data_mat[4] = ((real_t)(elMat_1_1));
+             _data_mat[5] = ((real_t)(elMat_1_2));
+             _data_mat[6] = ((real_t)(elMat_2_0));
+             _data_mat[7] = ((real_t)(elMat_2_1));
+             _data_mat[8] = ((real_t)(elMat_2_2));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+       const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0;
+       const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0);
+       const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1);
+       const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0);
+       const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1);
+       const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE;
+       const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE;
+       const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE;
+       const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE;
+       const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE;
+       const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE;
+       const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE;
+       const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE;
+       const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE;
+       const real_t abs_det_jac_affine_BLUE = abs(jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE);
+       {
+          /* FaceType.BLUE */
+          const real_t _data_phi_psi_jac_affine_det_0_0_BLUE [] = {((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871157)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196505)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871226)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196532)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871174)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.01168626253704612)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.04820837781551205)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.19887005655022641)), ((real_t)(abs_det_jac_affine_BLUE*0.6672399574840655)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.074803807748196491)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122)), ((real_t)(abs_det_jac_affine_BLUE*0.0083862028807871122))};
+      
+          for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1)
+          for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1)
+          {
+         
+             const int64_t phantom_ctr_0 = ctr_0;
+             real_t _data_float_loop_ctr_array_dim_0[4];
+             _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0;
+             _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1;
+             _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2;
+             _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3;
+             real_t _data_float_loop_ctr_array_dim_1[4];
+             _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1;
+             _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1;
+         
+             const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0];
+             const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0;
+             const real_t k_dof_0 = _data_k[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1];
+             const real_t k_dof_1 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))];
+             const real_t k_dof_2 = _data_k[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1];
+             real_t q_acc_0_0 = 0.0;
+             real_t q_acc_0_1 = 0.0;
+             real_t q_acc_0_2 = 0.0;
+             real_t q_acc_1_1 = 0.0;
+             real_t q_acc_1_2 = 0.0;
+             real_t q_acc_2_2 = 0.0;
+             for (int64_t q = 0; q < 6; q += 1)
+             {
+                const real_t tmp_qloop_1 = -p_affine_0_0 + (p_affine_0_0 - p_affine_1_0)*_data_q_p_0[q] + (p_affine_0_0 - p_affine_2_0)*_data_q_p_1[q];
+                const real_t tmp_qloop_2 = (tmp_qloop_1*tmp_qloop_1);
+                const real_t tmp_qloop_3 = -p_affine_0_1 + (p_affine_0_1 - p_affine_1_1)*_data_q_p_0[q] + (p_affine_0_1 - p_affine_2_1)*_data_q_p_1[q];
+                const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3);
+                const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4;
+                const real_t tmp_qloop_8 = pow(tmp_qloop_5, -0.50000000000000000)*tmp_qloop_7*1.0;
+                const real_t tmp_qloop_9 = tmp_qloop_1*tmp_qloop_8;
+                const real_t tmp_qloop_10 = -tmp_qloop_0*(rayVertex_0 + tmp_qloop_1) + tmp_qloop_6*(rayVertex_1 + tmp_qloop_3);
+                const real_t tmp_qloop_11 = pow(tmp_qloop_5, -1.5000000000000000)*1.0;
+                const real_t tmp_qloop_12 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_13 = tmp_qloop_3*tmp_qloop_8;
+                const real_t tmp_qloop_14 = tmp_qloop_11*(radRayVertex + tmp_qloop_10*tmp_qloop_7);
+                const real_t tmp_qloop_15 = tmp_qloop_1*tmp_qloop_3;
+                const real_t tmp_qloop_16 = (k_dof_0*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]) + k_dof_1*_data_q_p_0[q] + k_dof_2*_data_q_p_1[q])*abs((tmp_qloop_0*tmp_qloop_13 + tmp_qloop_12*tmp_qloop_15)*(tmp_qloop_14*tmp_qloop_15 - tmp_qloop_6*tmp_qloop_9) + (tmp_qloop_0*tmp_qloop_9 - tmp_qloop_12*tmp_qloop_4)*(tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14*tmp_qloop_2))*_data_q_w[q];
+                const real_t q_tmp_0_0 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q];
+                const real_t q_tmp_0_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 1];
+                const real_t q_tmp_0_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 2];
+                const real_t q_tmp_1_1 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 3];
+                const real_t q_tmp_1_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 4];
+                const real_t q_tmp_2_2 = tmp_qloop_16*_data_phi_psi_jac_affine_det_0_0_BLUE[6*q + 5];
+                q_acc_0_0 = q_acc_0_0 + q_tmp_0_0;
+                q_acc_0_1 = q_acc_0_1 + q_tmp_0_1;
+                q_acc_0_2 = q_acc_0_2 + q_tmp_0_2;
+                q_acc_1_1 = q_acc_1_1 + q_tmp_1_1;
+                q_acc_1_2 = q_acc_1_2 + q_tmp_1_2;
+                q_acc_2_2 = q_acc_2_2 + q_tmp_2_2;
+             }
+             const real_t elMat_0_0 = q_acc_0_0;
+             const real_t elMat_0_1 = q_acc_0_1;
+             const real_t elMat_0_2 = q_acc_0_2;
+             const real_t elMat_1_0 = q_acc_0_1;
+             const real_t elMat_1_1 = q_acc_1_1;
+             const real_t elMat_1_2 = q_acc_1_2;
+             const real_t elMat_2_0 = q_acc_0_2;
+             const real_t elMat_2_1 = q_acc_1_2;
+             const real_t elMat_2_2 = q_acc_2_2;
+         
+             std::vector< uint_t > _data_rowIdx( 3 );
+             std::vector< uint_t > _data_colIdx( 3 );
+             std::vector< real_t > _data_mat( 9 );
+         
+             _data_rowIdx[0] = ((uint64_t)(_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_rowIdx[1] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_rowIdx[2] = ((uint64_t)(_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+             _data_colIdx[0] = ((uint64_t)(_data_src[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]));
+             _data_colIdx[1] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]));
+             _data_colIdx[2] = ((uint64_t)(_data_src[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]));
+         
+             /* Apply basis transformation */
+         
+         
+         
+             _data_mat[0] = ((real_t)(elMat_0_0));
+             _data_mat[1] = ((real_t)(elMat_0_1));
+             _data_mat[2] = ((real_t)(elMat_0_2));
+             _data_mat[3] = ((real_t)(elMat_1_0));
+             _data_mat[4] = ((real_t)(elMat_1_1));
+             _data_mat[5] = ((real_t)(elMat_1_2));
+             _data_mat[6] = ((real_t)(elMat_2_0));
+             _data_mat[7] = ((real_t)(elMat_2_1));
+             _data_mat[8] = ((real_t)(elMat_2_2));
+         
+         
+             mat->addValues( _data_rowIdx, _data_colIdx, _data_mat );
+          }
+       }
+    }
+}
+} // namespace operatorgeneration
+
+} // namespace hyteg